1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007-2009 Robert N. M. Watson 5 * Copyright (c) 2010-2011 Juniper Networks, Inc. 6 * All rights reserved. 7 * 8 * This software was developed by Robert N. M. Watson under contract 9 * to Juniper Networks, Inc. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /* 37 * netisr is a packet dispatch service, allowing synchronous (directly 38 * dispatched) and asynchronous (deferred dispatch) processing of packets by 39 * registered protocol handlers. Callers pass a protocol identifier and 40 * packet to netisr, along with a direct dispatch hint, and work will either 41 * be immediately processed by the registered handler, or passed to a 42 * software interrupt (SWI) thread for deferred dispatch. Callers will 43 * generally select one or the other based on: 44 * 45 * - Whether directly dispatching a netisr handler lead to code reentrance or 46 * lock recursion, such as entering the socket code from the socket code. 47 * - Whether directly dispatching a netisr handler lead to recursive 48 * processing, such as when decapsulating several wrapped layers of tunnel 49 * information (IPSEC within IPSEC within ...). 50 * 51 * Maintaining ordering for protocol streams is a critical design concern. 52 * Enforcing ordering limits the opportunity for concurrency, but maintains 53 * the strong ordering requirements found in some protocols, such as TCP. Of 54 * related concern is CPU affinity--it is desirable to process all data 55 * associated with a particular stream on the same CPU over time in order to 56 * avoid acquiring locks associated with the connection on different CPUs, 57 * keep connection data in one cache, and to generally encourage associated 58 * user threads to live on the same CPU as the stream. It's also desirable 59 * to avoid lock migration and contention where locks are associated with 60 * more than one flow. 61 * 62 * netisr supports several policy variations, represented by the 63 * NETISR_POLICY_* constants, allowing protocols to play various roles in 64 * identifying flows, assigning work to CPUs, etc. These are described in 65 * netisr.h. 66 */ 67 68 #include "opt_ddb.h" 69 #include "opt_device_polling.h" 70 71 #include <sys/param.h> 72 #include <sys/bus.h> 73 #include <sys/kernel.h> 74 #include <sys/kthread.h> 75 #include <sys/malloc.h> 76 #include <sys/interrupt.h> 77 #include <sys/lock.h> 78 #include <sys/mbuf.h> 79 #include <sys/mutex.h> 80 #include <sys/pcpu.h> 81 #include <sys/proc.h> 82 #include <sys/rmlock.h> 83 #include <sys/sched.h> 84 #include <sys/smp.h> 85 #include <sys/socket.h> 86 #include <sys/sysctl.h> 87 #include <sys/systm.h> 88 89 #ifdef DDB 90 #include <ddb/ddb.h> 91 #endif 92 93 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 94 #include <net/if.h> 95 #include <net/if_var.h> 96 #include <net/netisr.h> 97 #include <net/netisr_internal.h> 98 #include <net/vnet.h> 99 100 /*- 101 * Synchronize use and modification of the registered netisr data structures; 102 * acquire a read lock while modifying the set of registered protocols to 103 * prevent partially registered or unregistered protocols from being run. 104 * 105 * The following data structures and fields are protected by this lock: 106 * 107 * - The netisr_proto array, including all fields of struct netisr_proto. 108 * - The nws array, including all fields of struct netisr_worker. 109 * - The nws_array array. 110 * 111 * Note: the NETISR_LOCKING define controls whether read locks are acquired 112 * in packet processing paths requiring netisr registration stability. This 113 * is disabled by default as it can lead to measurable performance 114 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 115 * because netisr registration and unregistration is extremely rare at 116 * runtime. If it becomes more common, this decision should be revisited. 117 * 118 * XXXRW: rmlocks don't support assertions. 119 */ 120 static struct rmlock netisr_rmlock; 121 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 122 RM_NOWITNESS) 123 #define NETISR_LOCK_ASSERT() 124 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 125 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 126 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 127 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 128 /* #define NETISR_LOCKING */ 129 130 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 131 132 /*- 133 * Three global direct dispatch policies are supported: 134 * 135 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 136 * context (may be overriden by protocols). 137 * 138 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 139 * and we're running on the CPU the work would be performed on, then direct 140 * dispatch it if it wouldn't violate ordering constraints on the workstream. 141 * 142 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 143 * always direct dispatch. (The default.) 144 * 145 * Notice that changing the global policy could lead to short periods of 146 * misordered processing, but this is considered acceptable as compared to 147 * the complexity of enforcing ordering during policy changes. Protocols can 148 * override the global policy (when they're not doing that, they select 149 * NETISR_DISPATCH_DEFAULT). 150 */ 151 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 152 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 153 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 154 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 155 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN, 156 0, 0, sysctl_netisr_dispatch_policy, "A", 157 "netisr dispatch policy"); 158 159 /* 160 * Allow the administrator to limit the number of threads (CPUs) to use for 161 * netisr. We don't check netisr_maxthreads before creating the thread for 162 * CPU 0. This must be set at boot. We will create at most one thread per CPU. 163 * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and 164 * therefore only 1 workstream. If set to -1, netisr would use all cpus 165 * (mp_ncpus) and therefore would have those many workstreams. One workstream 166 * per thread (CPU). 167 */ 168 static int netisr_maxthreads = 1; /* Max number of threads. */ 169 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 170 &netisr_maxthreads, 0, 171 "Use at most this many CPUs for netisr processing"); 172 173 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 174 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 175 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 176 177 /* 178 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 179 * both for initial configuration and later modification using 180 * netisr_setqlimit(). 181 */ 182 #define NETISR_DEFAULT_MAXQLIMIT 10240 183 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 184 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 185 &netisr_maxqlimit, 0, 186 "Maximum netisr per-protocol, per-CPU queue depth."); 187 188 /* 189 * The default per-workstream mbuf queue limit for protocols that don't 190 * initialize the nh_qlimit field of their struct netisr_handler. If this is 191 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 192 */ 193 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 194 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 195 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 196 &netisr_defaultqlimit, 0, 197 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 198 199 /* 200 * Store and export the compile-time constant NETISR_MAXPROT limit on the 201 * number of protocols that can register with netisr at a time. This is 202 * required for crashdump analysis, as it sizes netisr_proto[]. 203 */ 204 static u_int netisr_maxprot = NETISR_MAXPROT; 205 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 206 &netisr_maxprot, 0, 207 "Compile-time limit on the number of protocols supported by netisr."); 208 209 /* 210 * The netisr_proto array describes all registered protocols, indexed by 211 * protocol number. See netisr_internal.h for more details. 212 */ 213 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 214 215 #ifdef VIMAGE 216 /* 217 * The netisr_enable array describes a per-VNET flag for registered 218 * protocols on whether this netisr is active in this VNET or not. 219 * netisr_register() will automatically enable the netisr for the 220 * default VNET and all currently active instances. 221 * netisr_unregister() will disable all active VNETs, including vnet0. 222 * Individual network stack instances can be enabled/disabled by the 223 * netisr_(un)register _vnet() functions. 224 * With this we keep the one netisr_proto per protocol but add a 225 * mechanism to stop netisr processing for vnet teardown. 226 * Apart from that we expect a VNET to always be enabled. 227 */ 228 VNET_DEFINE_STATIC(u_int, netisr_enable[NETISR_MAXPROT]); 229 #define V_netisr_enable VNET(netisr_enable) 230 #endif 231 232 /* 233 * Per-CPU workstream data. See netisr_internal.h for more details. 234 */ 235 DPCPU_DEFINE(struct netisr_workstream, nws); 236 237 /* 238 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 239 * accessing workstreams. This allows constructions of the form 240 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 241 */ 242 static u_int nws_array[MAXCPU]; 243 244 /* 245 * Number of registered workstreams. Will be at most the number of running 246 * CPUs once fully started. 247 */ 248 static u_int nws_count; 249 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 250 &nws_count, 0, "Number of extant netisr threads."); 251 252 /* 253 * Synchronization for each workstream: a mutex protects all mutable fields 254 * in each stream, including per-protocol state (mbuf queues). The SWI is 255 * woken up if asynchronous dispatch is required. 256 */ 257 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 258 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 259 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 260 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 261 262 /* 263 * Utility routines for protocols that implement their own mapping of flows 264 * to CPUs. 265 */ 266 u_int 267 netisr_get_cpucount(void) 268 { 269 270 return (nws_count); 271 } 272 273 u_int 274 netisr_get_cpuid(u_int cpunumber) 275 { 276 277 return (nws_array[cpunumber % nws_count]); 278 } 279 280 /* 281 * The default implementation of flow -> CPU ID mapping. 282 * 283 * Non-static so that protocols can use it to map their own work to specific 284 * CPUs in a manner consistent to netisr for affinity purposes. 285 */ 286 u_int 287 netisr_default_flow2cpu(u_int flowid) 288 { 289 290 return (nws_array[flowid % nws_count]); 291 } 292 293 /* 294 * Dispatch tunable and sysctl configuration. 295 */ 296 struct netisr_dispatch_table_entry { 297 u_int ndte_policy; 298 const char *ndte_policy_str; 299 }; 300 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 301 { NETISR_DISPATCH_DEFAULT, "default" }, 302 { NETISR_DISPATCH_DEFERRED, "deferred" }, 303 { NETISR_DISPATCH_HYBRID, "hybrid" }, 304 { NETISR_DISPATCH_DIRECT, "direct" }, 305 }; 306 307 static void 308 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 309 u_int buflen) 310 { 311 const struct netisr_dispatch_table_entry *ndtep; 312 const char *str; 313 u_int i; 314 315 str = "unknown"; 316 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 317 ndtep = &netisr_dispatch_table[i]; 318 if (ndtep->ndte_policy == dispatch_policy) { 319 str = ndtep->ndte_policy_str; 320 break; 321 } 322 } 323 snprintf(buffer, buflen, "%s", str); 324 } 325 326 static int 327 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 328 { 329 const struct netisr_dispatch_table_entry *ndtep; 330 u_int i; 331 332 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 333 ndtep = &netisr_dispatch_table[i]; 334 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 335 *dispatch_policyp = ndtep->ndte_policy; 336 return (0); 337 } 338 } 339 return (EINVAL); 340 } 341 342 static int 343 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 344 { 345 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 346 u_int dispatch_policy; 347 int error; 348 349 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 350 sizeof(tmp)); 351 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 352 if (error == 0 && req->newptr != NULL) { 353 error = netisr_dispatch_policy_from_str(tmp, 354 &dispatch_policy); 355 if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) 356 error = EINVAL; 357 if (error == 0) 358 netisr_dispatch_policy = dispatch_policy; 359 } 360 return (error); 361 } 362 363 /* 364 * Register a new netisr handler, which requires initializing per-protocol 365 * fields for each workstream. All netisr work is briefly suspended while 366 * the protocol is installed. 367 */ 368 void 369 netisr_register(const struct netisr_handler *nhp) 370 { 371 VNET_ITERATOR_DECL(vnet_iter); 372 struct netisr_work *npwp; 373 const char *name; 374 u_int i, proto; 375 376 proto = nhp->nh_proto; 377 name = nhp->nh_name; 378 379 /* 380 * Test that the requested registration is valid. 381 */ 382 KASSERT(nhp->nh_name != NULL, 383 ("%s: nh_name NULL for %u", __func__, proto)); 384 KASSERT(nhp->nh_handler != NULL, 385 ("%s: nh_handler NULL for %s", __func__, name)); 386 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 387 nhp->nh_policy == NETISR_POLICY_FLOW || 388 nhp->nh_policy == NETISR_POLICY_CPU, 389 ("%s: unsupported nh_policy %u for %s", __func__, 390 nhp->nh_policy, name)); 391 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 392 nhp->nh_m2flow == NULL, 393 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 394 name)); 395 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 396 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 397 name)); 398 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 399 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 400 name)); 401 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 402 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 403 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 404 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 405 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 406 407 KASSERT(proto < NETISR_MAXPROT, 408 ("%s(%u, %s): protocol too big", __func__, proto, name)); 409 410 /* 411 * Test that no existing registration exists for this protocol. 412 */ 413 NETISR_WLOCK(); 414 KASSERT(netisr_proto[proto].np_name == NULL, 415 ("%s(%u, %s): name present", __func__, proto, name)); 416 KASSERT(netisr_proto[proto].np_handler == NULL, 417 ("%s(%u, %s): handler present", __func__, proto, name)); 418 419 netisr_proto[proto].np_name = name; 420 netisr_proto[proto].np_handler = nhp->nh_handler; 421 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 422 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 423 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 424 if (nhp->nh_qlimit == 0) 425 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 426 else if (nhp->nh_qlimit > netisr_maxqlimit) { 427 printf("%s: %s requested queue limit %u capped to " 428 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 429 netisr_maxqlimit); 430 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 431 } else 432 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 433 netisr_proto[proto].np_policy = nhp->nh_policy; 434 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 435 CPU_FOREACH(i) { 436 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 437 bzero(npwp, sizeof(*npwp)); 438 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 439 } 440 441 #ifdef VIMAGE 442 /* 443 * Test that we are in vnet0 and have a curvnet set. 444 */ 445 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 446 KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p", 447 __func__, curvnet, vnet0)); 448 VNET_LIST_RLOCK_NOSLEEP(); 449 VNET_FOREACH(vnet_iter) { 450 CURVNET_SET(vnet_iter); 451 V_netisr_enable[proto] = 1; 452 CURVNET_RESTORE(); 453 } 454 VNET_LIST_RUNLOCK_NOSLEEP(); 455 #endif 456 NETISR_WUNLOCK(); 457 } 458 459 /* 460 * Clear drop counters across all workstreams for a protocol. 461 */ 462 void 463 netisr_clearqdrops(const struct netisr_handler *nhp) 464 { 465 struct netisr_work *npwp; 466 #ifdef INVARIANTS 467 const char *name; 468 #endif 469 u_int i, proto; 470 471 proto = nhp->nh_proto; 472 #ifdef INVARIANTS 473 name = nhp->nh_name; 474 #endif 475 KASSERT(proto < NETISR_MAXPROT, 476 ("%s(%u): protocol too big for %s", __func__, proto, name)); 477 478 NETISR_WLOCK(); 479 KASSERT(netisr_proto[proto].np_handler != NULL, 480 ("%s(%u): protocol not registered for %s", __func__, proto, 481 name)); 482 483 CPU_FOREACH(i) { 484 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 485 npwp->nw_qdrops = 0; 486 } 487 NETISR_WUNLOCK(); 488 } 489 490 /* 491 * Query current drop counters across all workstreams for a protocol. 492 */ 493 void 494 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 495 { 496 struct netisr_work *npwp; 497 struct rm_priotracker tracker; 498 #ifdef INVARIANTS 499 const char *name; 500 #endif 501 u_int i, proto; 502 503 *qdropp = 0; 504 proto = nhp->nh_proto; 505 #ifdef INVARIANTS 506 name = nhp->nh_name; 507 #endif 508 KASSERT(proto < NETISR_MAXPROT, 509 ("%s(%u): protocol too big for %s", __func__, proto, name)); 510 511 NETISR_RLOCK(&tracker); 512 KASSERT(netisr_proto[proto].np_handler != NULL, 513 ("%s(%u): protocol not registered for %s", __func__, proto, 514 name)); 515 516 CPU_FOREACH(i) { 517 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 518 *qdropp += npwp->nw_qdrops; 519 } 520 NETISR_RUNLOCK(&tracker); 521 } 522 523 /* 524 * Query current per-workstream queue limit for a protocol. 525 */ 526 void 527 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 528 { 529 struct rm_priotracker tracker; 530 #ifdef INVARIANTS 531 const char *name; 532 #endif 533 u_int proto; 534 535 proto = nhp->nh_proto; 536 #ifdef INVARIANTS 537 name = nhp->nh_name; 538 #endif 539 KASSERT(proto < NETISR_MAXPROT, 540 ("%s(%u): protocol too big for %s", __func__, proto, name)); 541 542 NETISR_RLOCK(&tracker); 543 KASSERT(netisr_proto[proto].np_handler != NULL, 544 ("%s(%u): protocol not registered for %s", __func__, proto, 545 name)); 546 *qlimitp = netisr_proto[proto].np_qlimit; 547 NETISR_RUNLOCK(&tracker); 548 } 549 550 /* 551 * Update the queue limit across per-workstream queues for a protocol. We 552 * simply change the limits, and don't drain overflowed packets as they will 553 * (hopefully) take care of themselves shortly. 554 */ 555 int 556 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 557 { 558 struct netisr_work *npwp; 559 #ifdef INVARIANTS 560 const char *name; 561 #endif 562 u_int i, proto; 563 564 if (qlimit > netisr_maxqlimit) 565 return (EINVAL); 566 567 proto = nhp->nh_proto; 568 #ifdef INVARIANTS 569 name = nhp->nh_name; 570 #endif 571 KASSERT(proto < NETISR_MAXPROT, 572 ("%s(%u): protocol too big for %s", __func__, proto, name)); 573 574 NETISR_WLOCK(); 575 KASSERT(netisr_proto[proto].np_handler != NULL, 576 ("%s(%u): protocol not registered for %s", __func__, proto, 577 name)); 578 579 netisr_proto[proto].np_qlimit = qlimit; 580 CPU_FOREACH(i) { 581 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 582 npwp->nw_qlimit = qlimit; 583 } 584 NETISR_WUNLOCK(); 585 return (0); 586 } 587 588 /* 589 * Drain all packets currently held in a particular protocol work queue. 590 */ 591 static void 592 netisr_drain_proto(struct netisr_work *npwp) 593 { 594 struct mbuf *m; 595 596 /* 597 * We would assert the lock on the workstream but it's not passed in. 598 */ 599 while ((m = npwp->nw_head) != NULL) { 600 npwp->nw_head = m->m_nextpkt; 601 m->m_nextpkt = NULL; 602 if (npwp->nw_head == NULL) 603 npwp->nw_tail = NULL; 604 npwp->nw_len--; 605 m_freem(m); 606 } 607 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 608 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 609 } 610 611 /* 612 * Remove the registration of a network protocol, which requires clearing 613 * per-protocol fields across all workstreams, including freeing all mbufs in 614 * the queues at time of unregister. All work in netisr is briefly suspended 615 * while this takes place. 616 */ 617 void 618 netisr_unregister(const struct netisr_handler *nhp) 619 { 620 VNET_ITERATOR_DECL(vnet_iter); 621 struct netisr_work *npwp; 622 #ifdef INVARIANTS 623 const char *name; 624 #endif 625 u_int i, proto; 626 627 proto = nhp->nh_proto; 628 #ifdef INVARIANTS 629 name = nhp->nh_name; 630 #endif 631 KASSERT(proto < NETISR_MAXPROT, 632 ("%s(%u): protocol too big for %s", __func__, proto, name)); 633 634 NETISR_WLOCK(); 635 KASSERT(netisr_proto[proto].np_handler != NULL, 636 ("%s(%u): protocol not registered for %s", __func__, proto, 637 name)); 638 639 #ifdef VIMAGE 640 VNET_LIST_RLOCK_NOSLEEP(); 641 VNET_FOREACH(vnet_iter) { 642 CURVNET_SET(vnet_iter); 643 V_netisr_enable[proto] = 0; 644 CURVNET_RESTORE(); 645 } 646 VNET_LIST_RUNLOCK_NOSLEEP(); 647 #endif 648 649 netisr_proto[proto].np_name = NULL; 650 netisr_proto[proto].np_handler = NULL; 651 netisr_proto[proto].np_m2flow = NULL; 652 netisr_proto[proto].np_m2cpuid = NULL; 653 netisr_proto[proto].np_qlimit = 0; 654 netisr_proto[proto].np_policy = 0; 655 CPU_FOREACH(i) { 656 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 657 netisr_drain_proto(npwp); 658 bzero(npwp, sizeof(*npwp)); 659 } 660 NETISR_WUNLOCK(); 661 } 662 663 #ifdef VIMAGE 664 void 665 netisr_register_vnet(const struct netisr_handler *nhp) 666 { 667 u_int proto; 668 669 proto = nhp->nh_proto; 670 671 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 672 KASSERT(proto < NETISR_MAXPROT, 673 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 674 NETISR_WLOCK(); 675 KASSERT(netisr_proto[proto].np_handler != NULL, 676 ("%s(%u): protocol not registered for %s", __func__, proto, 677 nhp->nh_name)); 678 679 V_netisr_enable[proto] = 1; 680 NETISR_WUNLOCK(); 681 } 682 683 static void 684 netisr_drain_proto_vnet(struct vnet *vnet, u_int proto) 685 { 686 struct netisr_workstream *nwsp; 687 struct netisr_work *npwp; 688 struct mbuf *m, *mp, *n, *ne; 689 u_int i; 690 691 KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__)); 692 NETISR_LOCK_ASSERT(); 693 694 CPU_FOREACH(i) { 695 nwsp = DPCPU_ID_PTR(i, nws); 696 if (nwsp->nws_intr_event == NULL) 697 continue; 698 npwp = &nwsp->nws_work[proto]; 699 NWS_LOCK(nwsp); 700 701 /* 702 * Rather than dissecting and removing mbufs from the middle 703 * of the chain, we build a new chain if the packet stays and 704 * update the head and tail pointers at the end. All packets 705 * matching the given vnet are freed. 706 */ 707 m = npwp->nw_head; 708 n = ne = NULL; 709 while (m != NULL) { 710 mp = m; 711 m = m->m_nextpkt; 712 mp->m_nextpkt = NULL; 713 if (mp->m_pkthdr.rcvif->if_vnet != vnet) { 714 if (n == NULL) { 715 n = ne = mp; 716 } else { 717 ne->m_nextpkt = mp; 718 ne = mp; 719 } 720 continue; 721 } 722 /* This is a packet in the selected vnet. Free it. */ 723 npwp->nw_len--; 724 m_freem(mp); 725 } 726 npwp->nw_head = n; 727 npwp->nw_tail = ne; 728 NWS_UNLOCK(nwsp); 729 } 730 } 731 732 void 733 netisr_unregister_vnet(const struct netisr_handler *nhp) 734 { 735 u_int proto; 736 737 proto = nhp->nh_proto; 738 739 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 740 KASSERT(proto < NETISR_MAXPROT, 741 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 742 NETISR_WLOCK(); 743 KASSERT(netisr_proto[proto].np_handler != NULL, 744 ("%s(%u): protocol not registered for %s", __func__, proto, 745 nhp->nh_name)); 746 747 V_netisr_enable[proto] = 0; 748 749 netisr_drain_proto_vnet(curvnet, proto); 750 NETISR_WUNLOCK(); 751 } 752 #endif 753 754 /* 755 * Compose the global and per-protocol policies on dispatch, and return the 756 * dispatch policy to use. 757 */ 758 static u_int 759 netisr_get_dispatch(struct netisr_proto *npp) 760 { 761 762 /* 763 * Protocol-specific configuration overrides the global default. 764 */ 765 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 766 return (npp->np_dispatch); 767 return (netisr_dispatch_policy); 768 } 769 770 /* 771 * Look up the workstream given a packet and source identifier. Do this by 772 * checking the protocol's policy, and optionally call out to the protocol 773 * for assistance if required. 774 */ 775 static struct mbuf * 776 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 777 uintptr_t source, struct mbuf *m, u_int *cpuidp) 778 { 779 struct ifnet *ifp; 780 u_int policy; 781 782 NETISR_LOCK_ASSERT(); 783 784 /* 785 * In the event we have only one worker, shortcut and deliver to it 786 * without further ado. 787 */ 788 if (nws_count == 1) { 789 *cpuidp = nws_array[0]; 790 return (m); 791 } 792 793 /* 794 * What happens next depends on the policy selected by the protocol. 795 * If we want to support per-interface policies, we should do that 796 * here first. 797 */ 798 policy = npp->np_policy; 799 if (policy == NETISR_POLICY_CPU) { 800 m = npp->np_m2cpuid(m, source, cpuidp); 801 if (m == NULL) 802 return (NULL); 803 804 /* 805 * It's possible for a protocol not to have a good idea about 806 * where to process a packet, in which case we fall back on 807 * the netisr code to decide. In the hybrid case, return the 808 * current CPU ID, which will force an immediate direct 809 * dispatch. In the queued case, fall back on the SOURCE 810 * policy. 811 */ 812 if (*cpuidp != NETISR_CPUID_NONE) { 813 *cpuidp = netisr_get_cpuid(*cpuidp); 814 return (m); 815 } 816 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 817 *cpuidp = netisr_get_cpuid(curcpu); 818 return (m); 819 } 820 policy = NETISR_POLICY_SOURCE; 821 } 822 823 if (policy == NETISR_POLICY_FLOW) { 824 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 825 npp->np_m2flow != NULL) { 826 m = npp->np_m2flow(m, source); 827 if (m == NULL) 828 return (NULL); 829 } 830 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 831 *cpuidp = 832 netisr_default_flow2cpu(m->m_pkthdr.flowid); 833 return (m); 834 } 835 policy = NETISR_POLICY_SOURCE; 836 } 837 838 KASSERT(policy == NETISR_POLICY_SOURCE, 839 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 840 npp->np_name)); 841 842 MPASS((m->m_pkthdr.csum_flags & CSUM_SND_TAG) == 0); 843 ifp = m->m_pkthdr.rcvif; 844 if (ifp != NULL) 845 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 846 else 847 *cpuidp = nws_array[source % nws_count]; 848 return (m); 849 } 850 851 /* 852 * Process packets associated with a workstream and protocol. For reasons of 853 * fairness, we process up to one complete netisr queue at a time, moving the 854 * queue to a stack-local queue for processing, but do not loop refreshing 855 * from the global queue. The caller is responsible for deciding whether to 856 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 857 * locked on entry and relocked before return, but will be released while 858 * processing. The number of packets processed is returned. 859 */ 860 static u_int 861 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 862 { 863 struct netisr_work local_npw, *npwp; 864 u_int handled; 865 struct mbuf *m; 866 867 NETISR_LOCK_ASSERT(); 868 NWS_LOCK_ASSERT(nwsp); 869 870 KASSERT(nwsp->nws_flags & NWS_RUNNING, 871 ("%s(%u): not running", __func__, proto)); 872 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 873 ("%s(%u): invalid proto\n", __func__, proto)); 874 875 npwp = &nwsp->nws_work[proto]; 876 if (npwp->nw_len == 0) 877 return (0); 878 879 /* 880 * Move the global work queue to a thread-local work queue. 881 * 882 * Notice that this means the effective maximum length of the queue 883 * is actually twice that of the maximum queue length specified in 884 * the protocol registration call. 885 */ 886 handled = npwp->nw_len; 887 local_npw = *npwp; 888 npwp->nw_head = NULL; 889 npwp->nw_tail = NULL; 890 npwp->nw_len = 0; 891 nwsp->nws_pendingbits &= ~(1 << proto); 892 NWS_UNLOCK(nwsp); 893 while ((m = local_npw.nw_head) != NULL) { 894 local_npw.nw_head = m->m_nextpkt; 895 m->m_nextpkt = NULL; 896 if (local_npw.nw_head == NULL) 897 local_npw.nw_tail = NULL; 898 local_npw.nw_len--; 899 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 900 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 901 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 902 netisr_proto[proto].np_handler(m); 903 CURVNET_RESTORE(); 904 } 905 KASSERT(local_npw.nw_len == 0, 906 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 907 if (netisr_proto[proto].np_drainedcpu) 908 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 909 NWS_LOCK(nwsp); 910 npwp->nw_handled += handled; 911 return (handled); 912 } 913 914 /* 915 * SWI handler for netisr -- processes packets in a set of workstreams that 916 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 917 * being direct dispatched, go back to sleep and wait for the dispatching 918 * thread to wake us up again. 919 */ 920 static void 921 swi_net(void *arg) 922 { 923 struct epoch_tracker et; 924 #ifdef NETISR_LOCKING 925 struct rm_priotracker tracker; 926 #endif 927 struct netisr_workstream *nwsp; 928 u_int bits, prot; 929 930 nwsp = arg; 931 932 #ifdef DEVICE_POLLING 933 KASSERT(nws_count == 1, 934 ("%s: device_polling but nws_count != 1", __func__)); 935 NET_EPOCH_ENTER(et); 936 netisr_poll(); 937 NET_EPOCH_EXIT(et); 938 #endif 939 #ifdef NETISR_LOCKING 940 NETISR_RLOCK(&tracker); 941 #endif 942 NWS_LOCK(nwsp); 943 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 944 if (nwsp->nws_flags & NWS_DISPATCHING) 945 goto out; 946 NET_EPOCH_ENTER(et); 947 nwsp->nws_flags |= NWS_RUNNING; 948 nwsp->nws_flags &= ~NWS_SCHEDULED; 949 while ((bits = nwsp->nws_pendingbits) != 0) { 950 while ((prot = ffs(bits)) != 0) { 951 prot--; 952 bits &= ~(1 << prot); 953 (void)netisr_process_workstream_proto(nwsp, prot); 954 } 955 } 956 nwsp->nws_flags &= ~NWS_RUNNING; 957 NET_EPOCH_EXIT(et); 958 out: 959 NWS_UNLOCK(nwsp); 960 #ifdef NETISR_LOCKING 961 NETISR_RUNLOCK(&tracker); 962 #endif 963 #ifdef DEVICE_POLLING 964 netisr_pollmore(); 965 #endif 966 } 967 968 static int 969 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 970 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 971 { 972 973 NWS_LOCK_ASSERT(nwsp); 974 975 *dosignalp = 0; 976 if (npwp->nw_len < npwp->nw_qlimit) { 977 m->m_nextpkt = NULL; 978 if (npwp->nw_head == NULL) { 979 npwp->nw_head = m; 980 npwp->nw_tail = m; 981 } else { 982 npwp->nw_tail->m_nextpkt = m; 983 npwp->nw_tail = m; 984 } 985 npwp->nw_len++; 986 if (npwp->nw_len > npwp->nw_watermark) 987 npwp->nw_watermark = npwp->nw_len; 988 989 /* 990 * We must set the bit regardless of NWS_RUNNING, so that 991 * swi_net() keeps calling netisr_process_workstream_proto(). 992 */ 993 nwsp->nws_pendingbits |= (1 << proto); 994 if (!(nwsp->nws_flags & 995 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 996 nwsp->nws_flags |= NWS_SCHEDULED; 997 *dosignalp = 1; /* Defer until unlocked. */ 998 } 999 npwp->nw_queued++; 1000 return (0); 1001 } else { 1002 m_freem(m); 1003 npwp->nw_qdrops++; 1004 return (ENOBUFS); 1005 } 1006 } 1007 1008 static int 1009 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 1010 { 1011 struct netisr_workstream *nwsp; 1012 struct netisr_work *npwp; 1013 int dosignal, error; 1014 1015 #ifdef NETISR_LOCKING 1016 NETISR_LOCK_ASSERT(); 1017 #endif 1018 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 1019 cpuid, mp_maxid)); 1020 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1021 1022 dosignal = 0; 1023 error = 0; 1024 nwsp = DPCPU_ID_PTR(cpuid, nws); 1025 npwp = &nwsp->nws_work[proto]; 1026 NWS_LOCK(nwsp); 1027 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 1028 NWS_UNLOCK(nwsp); 1029 if (dosignal) 1030 NWS_SIGNAL(nwsp); 1031 return (error); 1032 } 1033 1034 int 1035 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 1036 { 1037 #ifdef NETISR_LOCKING 1038 struct rm_priotracker tracker; 1039 #endif 1040 u_int cpuid; 1041 int error; 1042 1043 KASSERT(proto < NETISR_MAXPROT, 1044 ("%s: invalid proto %u", __func__, proto)); 1045 1046 #ifdef NETISR_LOCKING 1047 NETISR_RLOCK(&tracker); 1048 #endif 1049 KASSERT(netisr_proto[proto].np_handler != NULL, 1050 ("%s: invalid proto %u", __func__, proto)); 1051 1052 #ifdef VIMAGE 1053 if (V_netisr_enable[proto] == 0) { 1054 m_freem(m); 1055 return (ENOPROTOOPT); 1056 } 1057 #endif 1058 1059 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 1060 source, m, &cpuid); 1061 if (m != NULL) { 1062 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 1063 cpuid)); 1064 error = netisr_queue_internal(proto, m, cpuid); 1065 } else 1066 error = ENOBUFS; 1067 #ifdef NETISR_LOCKING 1068 NETISR_RUNLOCK(&tracker); 1069 #endif 1070 return (error); 1071 } 1072 1073 int 1074 netisr_queue(u_int proto, struct mbuf *m) 1075 { 1076 1077 return (netisr_queue_src(proto, 0, m)); 1078 } 1079 1080 /* 1081 * Dispatch a packet for netisr processing; direct dispatch is permitted by 1082 * calling context. 1083 */ 1084 int 1085 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 1086 { 1087 #ifdef NETISR_LOCKING 1088 struct rm_priotracker tracker; 1089 #endif 1090 struct netisr_workstream *nwsp; 1091 struct netisr_proto *npp; 1092 struct netisr_work *npwp; 1093 int dosignal, error; 1094 u_int cpuid, dispatch_policy; 1095 1096 NET_EPOCH_ASSERT(); 1097 KASSERT(proto < NETISR_MAXPROT, 1098 ("%s: invalid proto %u", __func__, proto)); 1099 #ifdef NETISR_LOCKING 1100 NETISR_RLOCK(&tracker); 1101 #endif 1102 npp = &netisr_proto[proto]; 1103 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 1104 proto)); 1105 1106 #ifdef VIMAGE 1107 if (V_netisr_enable[proto] == 0) { 1108 m_freem(m); 1109 return (ENOPROTOOPT); 1110 } 1111 #endif 1112 1113 dispatch_policy = netisr_get_dispatch(npp); 1114 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 1115 return (netisr_queue_src(proto, source, m)); 1116 1117 /* 1118 * If direct dispatch is forced, then unconditionally dispatch 1119 * without a formal CPU selection. Borrow the current CPU's stats, 1120 * even if there's no worker on it. In this case we don't update 1121 * nws_flags because all netisr processing will be source ordered due 1122 * to always being forced to directly dispatch. 1123 */ 1124 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 1125 nwsp = DPCPU_PTR(nws); 1126 npwp = &nwsp->nws_work[proto]; 1127 npwp->nw_dispatched++; 1128 npwp->nw_handled++; 1129 netisr_proto[proto].np_handler(m); 1130 error = 0; 1131 goto out_unlock; 1132 } 1133 1134 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 1135 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 1136 1137 /* 1138 * Otherwise, we execute in a hybrid mode where we will try to direct 1139 * dispatch if we're on the right CPU and the netisr worker isn't 1140 * already running. 1141 */ 1142 sched_pin(); 1143 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 1144 source, m, &cpuid); 1145 if (m == NULL) { 1146 error = ENOBUFS; 1147 goto out_unpin; 1148 } 1149 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1150 if (cpuid != curcpu) 1151 goto queue_fallback; 1152 nwsp = DPCPU_PTR(nws); 1153 npwp = &nwsp->nws_work[proto]; 1154 1155 /*- 1156 * We are willing to direct dispatch only if three conditions hold: 1157 * 1158 * (1) The netisr worker isn't already running, 1159 * (2) Another thread isn't already directly dispatching, and 1160 * (3) The netisr hasn't already been woken up. 1161 */ 1162 NWS_LOCK(nwsp); 1163 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1164 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1165 &dosignal); 1166 NWS_UNLOCK(nwsp); 1167 if (dosignal) 1168 NWS_SIGNAL(nwsp); 1169 goto out_unpin; 1170 } 1171 1172 /* 1173 * The current thread is now effectively the netisr worker, so set 1174 * the dispatching flag to prevent concurrent processing of the 1175 * stream from another thread (even the netisr worker), which could 1176 * otherwise lead to effective misordering of the stream. 1177 */ 1178 nwsp->nws_flags |= NWS_DISPATCHING; 1179 NWS_UNLOCK(nwsp); 1180 netisr_proto[proto].np_handler(m); 1181 NWS_LOCK(nwsp); 1182 nwsp->nws_flags &= ~NWS_DISPATCHING; 1183 npwp->nw_handled++; 1184 npwp->nw_hybrid_dispatched++; 1185 1186 /* 1187 * If other work was enqueued by another thread while we were direct 1188 * dispatching, we need to signal the netisr worker to do that work. 1189 * In the future, we might want to do some of that work in the 1190 * current thread, rather than trigger further context switches. If 1191 * so, we'll want to establish a reasonable bound on the work done in 1192 * the "borrowed" context. 1193 */ 1194 if (nwsp->nws_pendingbits != 0) { 1195 nwsp->nws_flags |= NWS_SCHEDULED; 1196 dosignal = 1; 1197 } else 1198 dosignal = 0; 1199 NWS_UNLOCK(nwsp); 1200 if (dosignal) 1201 NWS_SIGNAL(nwsp); 1202 error = 0; 1203 goto out_unpin; 1204 1205 queue_fallback: 1206 error = netisr_queue_internal(proto, m, cpuid); 1207 out_unpin: 1208 sched_unpin(); 1209 out_unlock: 1210 #ifdef NETISR_LOCKING 1211 NETISR_RUNLOCK(&tracker); 1212 #endif 1213 return (error); 1214 } 1215 1216 int 1217 netisr_dispatch(u_int proto, struct mbuf *m) 1218 { 1219 1220 return (netisr_dispatch_src(proto, 0, m)); 1221 } 1222 1223 #ifdef DEVICE_POLLING 1224 /* 1225 * Kernel polling borrows a netisr thread to run interface polling in; this 1226 * function allows kernel polling to request that the netisr thread be 1227 * scheduled even if no packets are pending for protocols. 1228 */ 1229 void 1230 netisr_sched_poll(void) 1231 { 1232 struct netisr_workstream *nwsp; 1233 1234 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1235 NWS_SIGNAL(nwsp); 1236 } 1237 #endif 1238 1239 static void 1240 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1241 { 1242 char swiname[12]; 1243 struct netisr_workstream *nwsp; 1244 int error; 1245 1246 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1247 1248 nwsp = DPCPU_ID_PTR(cpuid, nws); 1249 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1250 nwsp->nws_cpu = cpuid; 1251 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1252 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1253 SWI_NET, INTR_TYPE_NET | INTR_MPSAFE, &nwsp->nws_swi_cookie); 1254 if (error) 1255 panic("%s: swi_add %d", __func__, error); 1256 pc->pc_netisr = nwsp->nws_intr_event; 1257 if (netisr_bindthreads) { 1258 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1259 if (error != 0) 1260 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1261 cpuid, error); 1262 } 1263 NETISR_WLOCK(); 1264 nws_array[nws_count] = nwsp->nws_cpu; 1265 nws_count++; 1266 NETISR_WUNLOCK(); 1267 } 1268 1269 /* 1270 * Initialize the netisr subsystem. We rely on BSS and static initialization 1271 * of most fields in global data structures. 1272 * 1273 * Start a worker thread for the boot CPU so that we can support network 1274 * traffic immediately in case the network stack is used before additional 1275 * CPUs are started (for example, diskless boot). 1276 */ 1277 static void 1278 netisr_init(void *arg) 1279 { 1280 struct pcpu *pc; 1281 1282 NETISR_LOCK_INIT(); 1283 if (netisr_maxthreads == 0 || netisr_maxthreads < -1 ) 1284 netisr_maxthreads = 1; /* default behavior */ 1285 else if (netisr_maxthreads == -1) 1286 netisr_maxthreads = mp_ncpus; /* use max cpus */ 1287 if (netisr_maxthreads > mp_ncpus) { 1288 printf("netisr_init: forcing maxthreads from %d to %d\n", 1289 netisr_maxthreads, mp_ncpus); 1290 netisr_maxthreads = mp_ncpus; 1291 } 1292 if (netisr_defaultqlimit > netisr_maxqlimit) { 1293 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1294 netisr_defaultqlimit, netisr_maxqlimit); 1295 netisr_defaultqlimit = netisr_maxqlimit; 1296 } 1297 #ifdef DEVICE_POLLING 1298 /* 1299 * The device polling code is not yet aware of how to deal with 1300 * multiple netisr threads, so for the time being compiling in device 1301 * polling disables parallel netisr workers. 1302 */ 1303 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1304 printf("netisr_init: forcing maxthreads to 1 and " 1305 "bindthreads to 0 for device polling\n"); 1306 netisr_maxthreads = 1; 1307 netisr_bindthreads = 0; 1308 } 1309 #endif 1310 1311 #ifdef EARLY_AP_STARTUP 1312 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1313 if (nws_count >= netisr_maxthreads) 1314 break; 1315 netisr_start_swi(pc->pc_cpuid, pc); 1316 } 1317 #else 1318 pc = get_pcpu(); 1319 netisr_start_swi(pc->pc_cpuid, pc); 1320 #endif 1321 } 1322 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1323 1324 #ifndef EARLY_AP_STARTUP 1325 /* 1326 * Start worker threads for additional CPUs. No attempt to gracefully handle 1327 * work reassignment, we don't yet support dynamic reconfiguration. 1328 */ 1329 static void 1330 netisr_start(void *arg) 1331 { 1332 struct pcpu *pc; 1333 1334 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1335 if (nws_count >= netisr_maxthreads) 1336 break; 1337 /* Worker will already be present for boot CPU. */ 1338 if (pc->pc_netisr != NULL) 1339 continue; 1340 netisr_start_swi(pc->pc_cpuid, pc); 1341 } 1342 } 1343 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1344 #endif 1345 1346 /* 1347 * Sysctl monitoring for netisr: query a list of registered protocols. 1348 */ 1349 static int 1350 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1351 { 1352 struct rm_priotracker tracker; 1353 struct sysctl_netisr_proto *snpp, *snp_array; 1354 struct netisr_proto *npp; 1355 u_int counter, proto; 1356 int error; 1357 1358 if (req->newptr != NULL) 1359 return (EINVAL); 1360 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1361 M_ZERO | M_WAITOK); 1362 counter = 0; 1363 NETISR_RLOCK(&tracker); 1364 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1365 npp = &netisr_proto[proto]; 1366 if (npp->np_name == NULL) 1367 continue; 1368 snpp = &snp_array[counter]; 1369 snpp->snp_version = sizeof(*snpp); 1370 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1371 snpp->snp_proto = proto; 1372 snpp->snp_qlimit = npp->np_qlimit; 1373 snpp->snp_policy = npp->np_policy; 1374 snpp->snp_dispatch = npp->np_dispatch; 1375 if (npp->np_m2flow != NULL) 1376 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1377 if (npp->np_m2cpuid != NULL) 1378 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1379 if (npp->np_drainedcpu != NULL) 1380 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1381 counter++; 1382 } 1383 NETISR_RUNLOCK(&tracker); 1384 KASSERT(counter <= NETISR_MAXPROT, 1385 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1386 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1387 free(snp_array, M_TEMP); 1388 return (error); 1389 } 1390 1391 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1392 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1393 "S,sysctl_netisr_proto", 1394 "Return list of protocols registered with netisr"); 1395 1396 /* 1397 * Sysctl monitoring for netisr: query a list of workstreams. 1398 */ 1399 static int 1400 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1401 { 1402 struct rm_priotracker tracker; 1403 struct sysctl_netisr_workstream *snwsp, *snws_array; 1404 struct netisr_workstream *nwsp; 1405 u_int counter, cpuid; 1406 int error; 1407 1408 if (req->newptr != NULL) 1409 return (EINVAL); 1410 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1411 M_ZERO | M_WAITOK); 1412 counter = 0; 1413 NETISR_RLOCK(&tracker); 1414 CPU_FOREACH(cpuid) { 1415 nwsp = DPCPU_ID_PTR(cpuid, nws); 1416 if (nwsp->nws_intr_event == NULL) 1417 continue; 1418 NWS_LOCK(nwsp); 1419 snwsp = &snws_array[counter]; 1420 snwsp->snws_version = sizeof(*snwsp); 1421 1422 /* 1423 * For now, we equate workstream IDs and CPU IDs in the 1424 * kernel, but expose them independently to userspace in case 1425 * that assumption changes in the future. 1426 */ 1427 snwsp->snws_wsid = cpuid; 1428 snwsp->snws_cpu = cpuid; 1429 if (nwsp->nws_intr_event != NULL) 1430 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1431 NWS_UNLOCK(nwsp); 1432 counter++; 1433 } 1434 NETISR_RUNLOCK(&tracker); 1435 KASSERT(counter <= MAXCPU, 1436 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1437 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1438 free(snws_array, M_TEMP); 1439 return (error); 1440 } 1441 1442 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1443 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1444 "S,sysctl_netisr_workstream", 1445 "Return list of workstreams implemented by netisr"); 1446 1447 /* 1448 * Sysctl monitoring for netisr: query per-protocol data across all 1449 * workstreams. 1450 */ 1451 static int 1452 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1453 { 1454 struct rm_priotracker tracker; 1455 struct sysctl_netisr_work *snwp, *snw_array; 1456 struct netisr_workstream *nwsp; 1457 struct netisr_proto *npp; 1458 struct netisr_work *nwp; 1459 u_int counter, cpuid, proto; 1460 int error; 1461 1462 if (req->newptr != NULL) 1463 return (EINVAL); 1464 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1465 M_TEMP, M_ZERO | M_WAITOK); 1466 counter = 0; 1467 NETISR_RLOCK(&tracker); 1468 CPU_FOREACH(cpuid) { 1469 nwsp = DPCPU_ID_PTR(cpuid, nws); 1470 if (nwsp->nws_intr_event == NULL) 1471 continue; 1472 NWS_LOCK(nwsp); 1473 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1474 npp = &netisr_proto[proto]; 1475 if (npp->np_name == NULL) 1476 continue; 1477 nwp = &nwsp->nws_work[proto]; 1478 snwp = &snw_array[counter]; 1479 snwp->snw_version = sizeof(*snwp); 1480 snwp->snw_wsid = cpuid; /* See comment above. */ 1481 snwp->snw_proto = proto; 1482 snwp->snw_len = nwp->nw_len; 1483 snwp->snw_watermark = nwp->nw_watermark; 1484 snwp->snw_dispatched = nwp->nw_dispatched; 1485 snwp->snw_hybrid_dispatched = 1486 nwp->nw_hybrid_dispatched; 1487 snwp->snw_qdrops = nwp->nw_qdrops; 1488 snwp->snw_queued = nwp->nw_queued; 1489 snwp->snw_handled = nwp->nw_handled; 1490 counter++; 1491 } 1492 NWS_UNLOCK(nwsp); 1493 } 1494 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1495 ("sysctl_netisr_work: counter too big (%d)", counter)); 1496 NETISR_RUNLOCK(&tracker); 1497 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1498 free(snw_array, M_TEMP); 1499 return (error); 1500 } 1501 1502 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1503 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1504 "S,sysctl_netisr_work", 1505 "Return list of per-workstream, per-protocol work in netisr"); 1506 1507 #ifdef DDB 1508 DB_SHOW_COMMAND(netisr, db_show_netisr) 1509 { 1510 struct netisr_workstream *nwsp; 1511 struct netisr_work *nwp; 1512 int first, proto; 1513 u_int cpuid; 1514 1515 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1516 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1517 CPU_FOREACH(cpuid) { 1518 nwsp = DPCPU_ID_PTR(cpuid, nws); 1519 if (nwsp->nws_intr_event == NULL) 1520 continue; 1521 first = 1; 1522 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1523 if (netisr_proto[proto].np_handler == NULL) 1524 continue; 1525 nwp = &nwsp->nws_work[proto]; 1526 if (first) { 1527 db_printf("%3d ", cpuid); 1528 first = 0; 1529 } else 1530 db_printf("%3s ", ""); 1531 db_printf( 1532 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1533 netisr_proto[proto].np_name, nwp->nw_len, 1534 nwp->nw_watermark, nwp->nw_qlimit, 1535 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1536 nwp->nw_qdrops, nwp->nw_queued); 1537 } 1538 } 1539 } 1540 #endif 1541