1 /*- 2 * Copyright (c) 2007-2009 Robert N. M. Watson 3 * Copyright (c) 2010-2011 Juniper Networks, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Robert N. M. Watson under contract 7 * to Juniper Networks, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 /* 35 * netisr is a packet dispatch service, allowing synchronous (directly 36 * dispatched) and asynchronous (deferred dispatch) processing of packets by 37 * registered protocol handlers. Callers pass a protocol identifier and 38 * packet to netisr, along with a direct dispatch hint, and work will either 39 * be immediately processed by the registered handler, or passed to a 40 * software interrupt (SWI) thread for deferred dispatch. Callers will 41 * generally select one or the other based on: 42 * 43 * - Whether directly dispatching a netisr handler lead to code reentrance or 44 * lock recursion, such as entering the socket code from the socket code. 45 * - Whether directly dispatching a netisr handler lead to recursive 46 * processing, such as when decapsulating several wrapped layers of tunnel 47 * information (IPSEC within IPSEC within ...). 48 * 49 * Maintaining ordering for protocol streams is a critical design concern. 50 * Enforcing ordering limits the opportunity for concurrency, but maintains 51 * the strong ordering requirements found in some protocols, such as TCP. Of 52 * related concern is CPU affinity--it is desirable to process all data 53 * associated with a particular stream on the same CPU over time in order to 54 * avoid acquiring locks associated with the connection on different CPUs, 55 * keep connection data in one cache, and to generally encourage associated 56 * user threads to live on the same CPU as the stream. It's also desirable 57 * to avoid lock migration and contention where locks are associated with 58 * more than one flow. 59 * 60 * netisr supports several policy variations, represented by the 61 * NETISR_POLICY_* constants, allowing protocols to play various roles in 62 * identifying flows, assigning work to CPUs, etc. These are described in 63 * netisr.h. 64 */ 65 66 #include "opt_ddb.h" 67 #include "opt_device_polling.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/kernel.h> 72 #include <sys/kthread.h> 73 #include <sys/malloc.h> 74 #include <sys/interrupt.h> 75 #include <sys/lock.h> 76 #include <sys/mbuf.h> 77 #include <sys/mutex.h> 78 #include <sys/pcpu.h> 79 #include <sys/proc.h> 80 #include <sys/rmlock.h> 81 #include <sys/sched.h> 82 #include <sys/smp.h> 83 #include <sys/socket.h> 84 #include <sys/sysctl.h> 85 #include <sys/systm.h> 86 87 #ifdef DDB 88 #include <ddb/ddb.h> 89 #endif 90 91 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 92 #include <net/if.h> 93 #include <net/if_var.h> 94 #include <net/netisr.h> 95 #include <net/netisr_internal.h> 96 #include <net/vnet.h> 97 98 /*- 99 * Synchronize use and modification of the registered netisr data structures; 100 * acquire a read lock while modifying the set of registered protocols to 101 * prevent partially registered or unregistered protocols from being run. 102 * 103 * The following data structures and fields are protected by this lock: 104 * 105 * - The netisr_proto array, including all fields of struct netisr_proto. 106 * - The nws array, including all fields of struct netisr_worker. 107 * - The nws_array array. 108 * 109 * Note: the NETISR_LOCKING define controls whether read locks are acquired 110 * in packet processing paths requiring netisr registration stability. This 111 * is disabled by default as it can lead to measurable performance 112 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 113 * because netisr registration and unregistration is extremely rare at 114 * runtime. If it becomes more common, this decision should be revisited. 115 * 116 * XXXRW: rmlocks don't support assertions. 117 */ 118 static struct rmlock netisr_rmlock; 119 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 120 RM_NOWITNESS) 121 #define NETISR_LOCK_ASSERT() 122 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 123 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 124 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 125 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 126 /* #define NETISR_LOCKING */ 127 128 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 129 130 /*- 131 * Three global direct dispatch policies are supported: 132 * 133 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 134 * context (may be overriden by protocols). 135 * 136 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 137 * and we're running on the CPU the work would be performed on, then direct 138 * dispatch it if it wouldn't violate ordering constraints on the workstream. 139 * 140 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 141 * always direct dispatch. (The default.) 142 * 143 * Notice that changing the global policy could lead to short periods of 144 * misordered processing, but this is considered acceptable as compared to 145 * the complexity of enforcing ordering during policy changes. Protocols can 146 * override the global policy (when they're not doing that, they select 147 * NETISR_DISPATCH_DEFAULT). 148 */ 149 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 150 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 151 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 152 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 153 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN, 154 0, 0, sysctl_netisr_dispatch_policy, "A", 155 "netisr dispatch policy"); 156 157 /* 158 * Allow the administrator to limit the number of threads (CPUs) to use for 159 * netisr. We don't check netisr_maxthreads before creating the thread for 160 * CPU 0. This must be set at boot. We will create at most one thread per CPU. 161 * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and 162 * therefore only 1 workstream. If set to -1, netisr would use all cpus 163 * (mp_ncpus) and therefore would have those many workstreams. One workstream 164 * per thread (CPU). 165 */ 166 static int netisr_maxthreads = 1; /* Max number of threads. */ 167 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 168 &netisr_maxthreads, 0, 169 "Use at most this many CPUs for netisr processing"); 170 171 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 172 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 173 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 174 175 /* 176 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 177 * both for initial configuration and later modification using 178 * netisr_setqlimit(). 179 */ 180 #define NETISR_DEFAULT_MAXQLIMIT 10240 181 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 182 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 183 &netisr_maxqlimit, 0, 184 "Maximum netisr per-protocol, per-CPU queue depth."); 185 186 /* 187 * The default per-workstream mbuf queue limit for protocols that don't 188 * initialize the nh_qlimit field of their struct netisr_handler. If this is 189 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 190 */ 191 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 192 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 193 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 194 &netisr_defaultqlimit, 0, 195 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 196 197 /* 198 * Store and export the compile-time constant NETISR_MAXPROT limit on the 199 * number of protocols that can register with netisr at a time. This is 200 * required for crashdump analysis, as it sizes netisr_proto[]. 201 */ 202 static u_int netisr_maxprot = NETISR_MAXPROT; 203 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 204 &netisr_maxprot, 0, 205 "Compile-time limit on the number of protocols supported by netisr."); 206 207 /* 208 * The netisr_proto array describes all registered protocols, indexed by 209 * protocol number. See netisr_internal.h for more details. 210 */ 211 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 212 213 #ifdef VIMAGE 214 /* 215 * The netisr_enable array describes a per-VNET flag for registered 216 * protocols on whether this netisr is active in this VNET or not. 217 * netisr_register() will automatically enable the netisr for the 218 * default VNET and all currently active instances. 219 * netisr_unregister() will disable all active VNETs, including vnet0. 220 * Individual network stack instances can be enabled/disabled by the 221 * netisr_(un)register _vnet() functions. 222 * With this we keep the one netisr_proto per protocol but add a 223 * mechanism to stop netisr processing for vnet teardown. 224 * Apart from that we expect a VNET to always be enabled. 225 */ 226 static VNET_DEFINE(u_int, netisr_enable[NETISR_MAXPROT]); 227 #define V_netisr_enable VNET(netisr_enable) 228 #endif 229 230 /* 231 * Per-CPU workstream data. See netisr_internal.h for more details. 232 */ 233 DPCPU_DEFINE(struct netisr_workstream, nws); 234 235 /* 236 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 237 * accessing workstreams. This allows constructions of the form 238 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 239 */ 240 static u_int nws_array[MAXCPU]; 241 242 /* 243 * Number of registered workstreams. Will be at most the number of running 244 * CPUs once fully started. 245 */ 246 static u_int nws_count; 247 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 248 &nws_count, 0, "Number of extant netisr threads."); 249 250 /* 251 * Synchronization for each workstream: a mutex protects all mutable fields 252 * in each stream, including per-protocol state (mbuf queues). The SWI is 253 * woken up if asynchronous dispatch is required. 254 */ 255 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 256 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 257 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 258 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 259 260 /* 261 * Utility routines for protocols that implement their own mapping of flows 262 * to CPUs. 263 */ 264 u_int 265 netisr_get_cpucount(void) 266 { 267 268 return (nws_count); 269 } 270 271 u_int 272 netisr_get_cpuid(u_int cpunumber) 273 { 274 275 KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, 276 nws_count)); 277 278 return (nws_array[cpunumber]); 279 } 280 281 /* 282 * The default implementation of flow -> CPU ID mapping. 283 * 284 * Non-static so that protocols can use it to map their own work to specific 285 * CPUs in a manner consistent to netisr for affinity purposes. 286 */ 287 u_int 288 netisr_default_flow2cpu(u_int flowid) 289 { 290 291 return (nws_array[flowid % nws_count]); 292 } 293 294 /* 295 * Dispatch tunable and sysctl configuration. 296 */ 297 struct netisr_dispatch_table_entry { 298 u_int ndte_policy; 299 const char *ndte_policy_str; 300 }; 301 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 302 { NETISR_DISPATCH_DEFAULT, "default" }, 303 { NETISR_DISPATCH_DEFERRED, "deferred" }, 304 { NETISR_DISPATCH_HYBRID, "hybrid" }, 305 { NETISR_DISPATCH_DIRECT, "direct" }, 306 }; 307 308 static void 309 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 310 u_int buflen) 311 { 312 const struct netisr_dispatch_table_entry *ndtep; 313 const char *str; 314 u_int i; 315 316 str = "unknown"; 317 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 318 ndtep = &netisr_dispatch_table[i]; 319 if (ndtep->ndte_policy == dispatch_policy) { 320 str = ndtep->ndte_policy_str; 321 break; 322 } 323 } 324 snprintf(buffer, buflen, "%s", str); 325 } 326 327 static int 328 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 329 { 330 const struct netisr_dispatch_table_entry *ndtep; 331 u_int i; 332 333 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 334 ndtep = &netisr_dispatch_table[i]; 335 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 336 *dispatch_policyp = ndtep->ndte_policy; 337 return (0); 338 } 339 } 340 return (EINVAL); 341 } 342 343 static int 344 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 345 { 346 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 347 u_int dispatch_policy; 348 int error; 349 350 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 351 sizeof(tmp)); 352 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 353 if (error == 0 && req->newptr != NULL) { 354 error = netisr_dispatch_policy_from_str(tmp, 355 &dispatch_policy); 356 if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) 357 error = EINVAL; 358 if (error == 0) 359 netisr_dispatch_policy = dispatch_policy; 360 } 361 return (error); 362 } 363 364 /* 365 * Register a new netisr handler, which requires initializing per-protocol 366 * fields for each workstream. All netisr work is briefly suspended while 367 * the protocol is installed. 368 */ 369 void 370 netisr_register(const struct netisr_handler *nhp) 371 { 372 VNET_ITERATOR_DECL(vnet_iter); 373 struct netisr_work *npwp; 374 const char *name; 375 u_int i, proto; 376 377 proto = nhp->nh_proto; 378 name = nhp->nh_name; 379 380 /* 381 * Test that the requested registration is valid. 382 */ 383 KASSERT(nhp->nh_name != NULL, 384 ("%s: nh_name NULL for %u", __func__, proto)); 385 KASSERT(nhp->nh_handler != NULL, 386 ("%s: nh_handler NULL for %s", __func__, name)); 387 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 388 nhp->nh_policy == NETISR_POLICY_FLOW || 389 nhp->nh_policy == NETISR_POLICY_CPU, 390 ("%s: unsupported nh_policy %u for %s", __func__, 391 nhp->nh_policy, name)); 392 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 393 nhp->nh_m2flow == NULL, 394 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 395 name)); 396 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 397 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 398 name)); 399 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 400 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 401 name)); 402 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 403 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 404 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 405 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 406 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 407 408 KASSERT(proto < NETISR_MAXPROT, 409 ("%s(%u, %s): protocol too big", __func__, proto, name)); 410 411 /* 412 * Test that no existing registration exists for this protocol. 413 */ 414 NETISR_WLOCK(); 415 KASSERT(netisr_proto[proto].np_name == NULL, 416 ("%s(%u, %s): name present", __func__, proto, name)); 417 KASSERT(netisr_proto[proto].np_handler == NULL, 418 ("%s(%u, %s): handler present", __func__, proto, name)); 419 420 netisr_proto[proto].np_name = name; 421 netisr_proto[proto].np_handler = nhp->nh_handler; 422 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 423 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 424 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 425 if (nhp->nh_qlimit == 0) 426 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 427 else if (nhp->nh_qlimit > netisr_maxqlimit) { 428 printf("%s: %s requested queue limit %u capped to " 429 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 430 netisr_maxqlimit); 431 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 432 } else 433 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 434 netisr_proto[proto].np_policy = nhp->nh_policy; 435 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 436 CPU_FOREACH(i) { 437 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 438 bzero(npwp, sizeof(*npwp)); 439 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 440 } 441 442 #ifdef VIMAGE 443 /* 444 * Test that we are in vnet0 and have a curvnet set. 445 */ 446 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 447 KASSERT(IS_DEFAULT_VNET(curvnet), ("%s: curvnet %p is not vnet0 %p", 448 __func__, curvnet, vnet0)); 449 VNET_LIST_RLOCK_NOSLEEP(); 450 VNET_FOREACH(vnet_iter) { 451 CURVNET_SET(vnet_iter); 452 V_netisr_enable[proto] = 1; 453 CURVNET_RESTORE(); 454 } 455 VNET_LIST_RUNLOCK_NOSLEEP(); 456 #endif 457 NETISR_WUNLOCK(); 458 } 459 460 /* 461 * Clear drop counters across all workstreams for a protocol. 462 */ 463 void 464 netisr_clearqdrops(const struct netisr_handler *nhp) 465 { 466 struct netisr_work *npwp; 467 #ifdef INVARIANTS 468 const char *name; 469 #endif 470 u_int i, proto; 471 472 proto = nhp->nh_proto; 473 #ifdef INVARIANTS 474 name = nhp->nh_name; 475 #endif 476 KASSERT(proto < NETISR_MAXPROT, 477 ("%s(%u): protocol too big for %s", __func__, proto, name)); 478 479 NETISR_WLOCK(); 480 KASSERT(netisr_proto[proto].np_handler != NULL, 481 ("%s(%u): protocol not registered for %s", __func__, proto, 482 name)); 483 484 CPU_FOREACH(i) { 485 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 486 npwp->nw_qdrops = 0; 487 } 488 NETISR_WUNLOCK(); 489 } 490 491 /* 492 * Query current drop counters across all workstreams for a protocol. 493 */ 494 void 495 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 496 { 497 struct netisr_work *npwp; 498 struct rm_priotracker tracker; 499 #ifdef INVARIANTS 500 const char *name; 501 #endif 502 u_int i, proto; 503 504 *qdropp = 0; 505 proto = nhp->nh_proto; 506 #ifdef INVARIANTS 507 name = nhp->nh_name; 508 #endif 509 KASSERT(proto < NETISR_MAXPROT, 510 ("%s(%u): protocol too big for %s", __func__, proto, name)); 511 512 NETISR_RLOCK(&tracker); 513 KASSERT(netisr_proto[proto].np_handler != NULL, 514 ("%s(%u): protocol not registered for %s", __func__, proto, 515 name)); 516 517 CPU_FOREACH(i) { 518 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 519 *qdropp += npwp->nw_qdrops; 520 } 521 NETISR_RUNLOCK(&tracker); 522 } 523 524 /* 525 * Query current per-workstream queue limit for a protocol. 526 */ 527 void 528 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 529 { 530 struct rm_priotracker tracker; 531 #ifdef INVARIANTS 532 const char *name; 533 #endif 534 u_int proto; 535 536 proto = nhp->nh_proto; 537 #ifdef INVARIANTS 538 name = nhp->nh_name; 539 #endif 540 KASSERT(proto < NETISR_MAXPROT, 541 ("%s(%u): protocol too big for %s", __func__, proto, name)); 542 543 NETISR_RLOCK(&tracker); 544 KASSERT(netisr_proto[proto].np_handler != NULL, 545 ("%s(%u): protocol not registered for %s", __func__, proto, 546 name)); 547 *qlimitp = netisr_proto[proto].np_qlimit; 548 NETISR_RUNLOCK(&tracker); 549 } 550 551 /* 552 * Update the queue limit across per-workstream queues for a protocol. We 553 * simply change the limits, and don't drain overflowed packets as they will 554 * (hopefully) take care of themselves shortly. 555 */ 556 int 557 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 558 { 559 struct netisr_work *npwp; 560 #ifdef INVARIANTS 561 const char *name; 562 #endif 563 u_int i, proto; 564 565 if (qlimit > netisr_maxqlimit) 566 return (EINVAL); 567 568 proto = nhp->nh_proto; 569 #ifdef INVARIANTS 570 name = nhp->nh_name; 571 #endif 572 KASSERT(proto < NETISR_MAXPROT, 573 ("%s(%u): protocol too big for %s", __func__, proto, name)); 574 575 NETISR_WLOCK(); 576 KASSERT(netisr_proto[proto].np_handler != NULL, 577 ("%s(%u): protocol not registered for %s", __func__, proto, 578 name)); 579 580 netisr_proto[proto].np_qlimit = qlimit; 581 CPU_FOREACH(i) { 582 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 583 npwp->nw_qlimit = qlimit; 584 } 585 NETISR_WUNLOCK(); 586 return (0); 587 } 588 589 /* 590 * Drain all packets currently held in a particular protocol work queue. 591 */ 592 static void 593 netisr_drain_proto(struct netisr_work *npwp) 594 { 595 struct mbuf *m; 596 597 /* 598 * We would assert the lock on the workstream but it's not passed in. 599 */ 600 while ((m = npwp->nw_head) != NULL) { 601 npwp->nw_head = m->m_nextpkt; 602 m->m_nextpkt = NULL; 603 if (npwp->nw_head == NULL) 604 npwp->nw_tail = NULL; 605 npwp->nw_len--; 606 m_freem(m); 607 } 608 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 609 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 610 } 611 612 /* 613 * Remove the registration of a network protocol, which requires clearing 614 * per-protocol fields across all workstreams, including freeing all mbufs in 615 * the queues at time of unregister. All work in netisr is briefly suspended 616 * while this takes place. 617 */ 618 void 619 netisr_unregister(const struct netisr_handler *nhp) 620 { 621 VNET_ITERATOR_DECL(vnet_iter); 622 struct netisr_work *npwp; 623 #ifdef INVARIANTS 624 const char *name; 625 #endif 626 u_int i, proto; 627 628 proto = nhp->nh_proto; 629 #ifdef INVARIANTS 630 name = nhp->nh_name; 631 #endif 632 KASSERT(proto < NETISR_MAXPROT, 633 ("%s(%u): protocol too big for %s", __func__, proto, name)); 634 635 NETISR_WLOCK(); 636 KASSERT(netisr_proto[proto].np_handler != NULL, 637 ("%s(%u): protocol not registered for %s", __func__, proto, 638 name)); 639 640 #ifdef VIMAGE 641 VNET_LIST_RLOCK_NOSLEEP(); 642 VNET_FOREACH(vnet_iter) { 643 CURVNET_SET(vnet_iter); 644 V_netisr_enable[proto] = 0; 645 CURVNET_RESTORE(); 646 } 647 VNET_LIST_RUNLOCK_NOSLEEP(); 648 #endif 649 650 netisr_proto[proto].np_name = NULL; 651 netisr_proto[proto].np_handler = NULL; 652 netisr_proto[proto].np_m2flow = NULL; 653 netisr_proto[proto].np_m2cpuid = NULL; 654 netisr_proto[proto].np_qlimit = 0; 655 netisr_proto[proto].np_policy = 0; 656 CPU_FOREACH(i) { 657 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 658 netisr_drain_proto(npwp); 659 bzero(npwp, sizeof(*npwp)); 660 } 661 NETISR_WUNLOCK(); 662 } 663 664 #ifdef VIMAGE 665 void 666 netisr_register_vnet(const struct netisr_handler *nhp) 667 { 668 u_int proto; 669 670 proto = nhp->nh_proto; 671 672 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 673 KASSERT(proto < NETISR_MAXPROT, 674 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 675 NETISR_WLOCK(); 676 KASSERT(netisr_proto[proto].np_handler != NULL, 677 ("%s(%u): protocol not registered for %s", __func__, proto, 678 nhp->nh_name)); 679 680 V_netisr_enable[proto] = 1; 681 NETISR_WUNLOCK(); 682 } 683 684 static void 685 netisr_drain_proto_vnet(struct vnet *vnet, u_int proto) 686 { 687 struct netisr_workstream *nwsp; 688 struct netisr_work *npwp; 689 struct mbuf *m, *mp, *n, *ne; 690 u_int i; 691 692 KASSERT(vnet != NULL, ("%s: vnet is NULL", __func__)); 693 NETISR_LOCK_ASSERT(); 694 695 CPU_FOREACH(i) { 696 nwsp = DPCPU_ID_PTR(i, nws); 697 if (nwsp->nws_intr_event == NULL) 698 continue; 699 npwp = &nwsp->nws_work[proto]; 700 NWS_LOCK(nwsp); 701 702 /* 703 * Rather than dissecting and removing mbufs from the middle 704 * of the chain, we build a new chain if the packet stays and 705 * update the head and tail pointers at the end. All packets 706 * matching the given vnet are freed. 707 */ 708 m = npwp->nw_head; 709 n = ne = NULL; 710 while (m != NULL) { 711 mp = m; 712 m = m->m_nextpkt; 713 mp->m_nextpkt = NULL; 714 if (mp->m_pkthdr.rcvif->if_vnet != vnet) { 715 if (n == NULL) { 716 n = ne = mp; 717 } else { 718 ne->m_nextpkt = mp; 719 ne = mp; 720 } 721 continue; 722 } 723 /* This is a packet in the selected vnet. Free it. */ 724 npwp->nw_len--; 725 m_freem(mp); 726 } 727 npwp->nw_head = n; 728 npwp->nw_tail = ne; 729 NWS_UNLOCK(nwsp); 730 } 731 } 732 733 void 734 netisr_unregister_vnet(const struct netisr_handler *nhp) 735 { 736 u_int proto; 737 738 proto = nhp->nh_proto; 739 740 KASSERT(curvnet != NULL, ("%s: curvnet is NULL", __func__)); 741 KASSERT(proto < NETISR_MAXPROT, 742 ("%s(%u): protocol too big for %s", __func__, proto, nhp->nh_name)); 743 NETISR_WLOCK(); 744 KASSERT(netisr_proto[proto].np_handler != NULL, 745 ("%s(%u): protocol not registered for %s", __func__, proto, 746 nhp->nh_name)); 747 748 V_netisr_enable[proto] = 0; 749 750 netisr_drain_proto_vnet(curvnet, proto); 751 NETISR_WUNLOCK(); 752 } 753 #endif 754 755 /* 756 * Compose the global and per-protocol policies on dispatch, and return the 757 * dispatch policy to use. 758 */ 759 static u_int 760 netisr_get_dispatch(struct netisr_proto *npp) 761 { 762 763 /* 764 * Protocol-specific configuration overrides the global default. 765 */ 766 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 767 return (npp->np_dispatch); 768 return (netisr_dispatch_policy); 769 } 770 771 /* 772 * Look up the workstream given a packet and source identifier. Do this by 773 * checking the protocol's policy, and optionally call out to the protocol 774 * for assistance if required. 775 */ 776 static struct mbuf * 777 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 778 uintptr_t source, struct mbuf *m, u_int *cpuidp) 779 { 780 struct ifnet *ifp; 781 u_int policy; 782 783 NETISR_LOCK_ASSERT(); 784 785 /* 786 * In the event we have only one worker, shortcut and deliver to it 787 * without further ado. 788 */ 789 if (nws_count == 1) { 790 *cpuidp = nws_array[0]; 791 return (m); 792 } 793 794 /* 795 * What happens next depends on the policy selected by the protocol. 796 * If we want to support per-interface policies, we should do that 797 * here first. 798 */ 799 policy = npp->np_policy; 800 if (policy == NETISR_POLICY_CPU) { 801 m = npp->np_m2cpuid(m, source, cpuidp); 802 if (m == NULL) 803 return (NULL); 804 805 /* 806 * It's possible for a protocol not to have a good idea about 807 * where to process a packet, in which case we fall back on 808 * the netisr code to decide. In the hybrid case, return the 809 * current CPU ID, which will force an immediate direct 810 * dispatch. In the queued case, fall back on the SOURCE 811 * policy. 812 */ 813 if (*cpuidp != NETISR_CPUID_NONE) 814 return (m); 815 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 816 *cpuidp = curcpu; 817 return (m); 818 } 819 policy = NETISR_POLICY_SOURCE; 820 } 821 822 if (policy == NETISR_POLICY_FLOW) { 823 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 824 npp->np_m2flow != NULL) { 825 m = npp->np_m2flow(m, source); 826 if (m == NULL) 827 return (NULL); 828 } 829 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 830 *cpuidp = 831 netisr_default_flow2cpu(m->m_pkthdr.flowid); 832 return (m); 833 } 834 policy = NETISR_POLICY_SOURCE; 835 } 836 837 KASSERT(policy == NETISR_POLICY_SOURCE, 838 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 839 npp->np_name)); 840 841 ifp = m->m_pkthdr.rcvif; 842 if (ifp != NULL) 843 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 844 else 845 *cpuidp = nws_array[source % nws_count]; 846 return (m); 847 } 848 849 /* 850 * Process packets associated with a workstream and protocol. For reasons of 851 * fairness, we process up to one complete netisr queue at a time, moving the 852 * queue to a stack-local queue for processing, but do not loop refreshing 853 * from the global queue. The caller is responsible for deciding whether to 854 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 855 * locked on entry and relocked before return, but will be released while 856 * processing. The number of packets processed is returned. 857 */ 858 static u_int 859 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 860 { 861 struct netisr_work local_npw, *npwp; 862 u_int handled; 863 struct mbuf *m; 864 865 NETISR_LOCK_ASSERT(); 866 NWS_LOCK_ASSERT(nwsp); 867 868 KASSERT(nwsp->nws_flags & NWS_RUNNING, 869 ("%s(%u): not running", __func__, proto)); 870 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 871 ("%s(%u): invalid proto\n", __func__, proto)); 872 873 npwp = &nwsp->nws_work[proto]; 874 if (npwp->nw_len == 0) 875 return (0); 876 877 /* 878 * Move the global work queue to a thread-local work queue. 879 * 880 * Notice that this means the effective maximum length of the queue 881 * is actually twice that of the maximum queue length specified in 882 * the protocol registration call. 883 */ 884 handled = npwp->nw_len; 885 local_npw = *npwp; 886 npwp->nw_head = NULL; 887 npwp->nw_tail = NULL; 888 npwp->nw_len = 0; 889 nwsp->nws_pendingbits &= ~(1 << proto); 890 NWS_UNLOCK(nwsp); 891 while ((m = local_npw.nw_head) != NULL) { 892 local_npw.nw_head = m->m_nextpkt; 893 m->m_nextpkt = NULL; 894 if (local_npw.nw_head == NULL) 895 local_npw.nw_tail = NULL; 896 local_npw.nw_len--; 897 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 898 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 899 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 900 netisr_proto[proto].np_handler(m); 901 CURVNET_RESTORE(); 902 } 903 KASSERT(local_npw.nw_len == 0, 904 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 905 if (netisr_proto[proto].np_drainedcpu) 906 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 907 NWS_LOCK(nwsp); 908 npwp->nw_handled += handled; 909 return (handled); 910 } 911 912 /* 913 * SWI handler for netisr -- processes packets in a set of workstreams that 914 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 915 * being direct dispatched, go back to sleep and wait for the dispatching 916 * thread to wake us up again. 917 */ 918 static void 919 swi_net(void *arg) 920 { 921 #ifdef NETISR_LOCKING 922 struct rm_priotracker tracker; 923 #endif 924 struct netisr_workstream *nwsp; 925 u_int bits, prot; 926 927 nwsp = arg; 928 929 #ifdef DEVICE_POLLING 930 KASSERT(nws_count == 1, 931 ("%s: device_polling but nws_count != 1", __func__)); 932 netisr_poll(); 933 #endif 934 #ifdef NETISR_LOCKING 935 NETISR_RLOCK(&tracker); 936 #endif 937 NWS_LOCK(nwsp); 938 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 939 if (nwsp->nws_flags & NWS_DISPATCHING) 940 goto out; 941 nwsp->nws_flags |= NWS_RUNNING; 942 nwsp->nws_flags &= ~NWS_SCHEDULED; 943 while ((bits = nwsp->nws_pendingbits) != 0) { 944 while ((prot = ffs(bits)) != 0) { 945 prot--; 946 bits &= ~(1 << prot); 947 (void)netisr_process_workstream_proto(nwsp, prot); 948 } 949 } 950 nwsp->nws_flags &= ~NWS_RUNNING; 951 out: 952 NWS_UNLOCK(nwsp); 953 #ifdef NETISR_LOCKING 954 NETISR_RUNLOCK(&tracker); 955 #endif 956 #ifdef DEVICE_POLLING 957 netisr_pollmore(); 958 #endif 959 } 960 961 static int 962 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 963 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 964 { 965 966 NWS_LOCK_ASSERT(nwsp); 967 968 *dosignalp = 0; 969 if (npwp->nw_len < npwp->nw_qlimit) { 970 m->m_nextpkt = NULL; 971 if (npwp->nw_head == NULL) { 972 npwp->nw_head = m; 973 npwp->nw_tail = m; 974 } else { 975 npwp->nw_tail->m_nextpkt = m; 976 npwp->nw_tail = m; 977 } 978 npwp->nw_len++; 979 if (npwp->nw_len > npwp->nw_watermark) 980 npwp->nw_watermark = npwp->nw_len; 981 982 /* 983 * We must set the bit regardless of NWS_RUNNING, so that 984 * swi_net() keeps calling netisr_process_workstream_proto(). 985 */ 986 nwsp->nws_pendingbits |= (1 << proto); 987 if (!(nwsp->nws_flags & 988 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 989 nwsp->nws_flags |= NWS_SCHEDULED; 990 *dosignalp = 1; /* Defer until unlocked. */ 991 } 992 npwp->nw_queued++; 993 return (0); 994 } else { 995 m_freem(m); 996 npwp->nw_qdrops++; 997 return (ENOBUFS); 998 } 999 } 1000 1001 static int 1002 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 1003 { 1004 struct netisr_workstream *nwsp; 1005 struct netisr_work *npwp; 1006 int dosignal, error; 1007 1008 #ifdef NETISR_LOCKING 1009 NETISR_LOCK_ASSERT(); 1010 #endif 1011 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 1012 cpuid, mp_maxid)); 1013 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1014 1015 dosignal = 0; 1016 error = 0; 1017 nwsp = DPCPU_ID_PTR(cpuid, nws); 1018 npwp = &nwsp->nws_work[proto]; 1019 NWS_LOCK(nwsp); 1020 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 1021 NWS_UNLOCK(nwsp); 1022 if (dosignal) 1023 NWS_SIGNAL(nwsp); 1024 return (error); 1025 } 1026 1027 int 1028 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 1029 { 1030 #ifdef NETISR_LOCKING 1031 struct rm_priotracker tracker; 1032 #endif 1033 u_int cpuid; 1034 int error; 1035 1036 KASSERT(proto < NETISR_MAXPROT, 1037 ("%s: invalid proto %u", __func__, proto)); 1038 1039 #ifdef NETISR_LOCKING 1040 NETISR_RLOCK(&tracker); 1041 #endif 1042 KASSERT(netisr_proto[proto].np_handler != NULL, 1043 ("%s: invalid proto %u", __func__, proto)); 1044 1045 #ifdef VIMAGE 1046 if (V_netisr_enable[proto] == 0) { 1047 m_freem(m); 1048 return (ENOPROTOOPT); 1049 } 1050 #endif 1051 1052 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 1053 source, m, &cpuid); 1054 if (m != NULL) { 1055 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 1056 cpuid)); 1057 error = netisr_queue_internal(proto, m, cpuid); 1058 } else 1059 error = ENOBUFS; 1060 #ifdef NETISR_LOCKING 1061 NETISR_RUNLOCK(&tracker); 1062 #endif 1063 return (error); 1064 } 1065 1066 int 1067 netisr_queue(u_int proto, struct mbuf *m) 1068 { 1069 1070 return (netisr_queue_src(proto, 0, m)); 1071 } 1072 1073 /* 1074 * Dispatch a packet for netisr processing; direct dispatch is permitted by 1075 * calling context. 1076 */ 1077 int 1078 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 1079 { 1080 #ifdef NETISR_LOCKING 1081 struct rm_priotracker tracker; 1082 #endif 1083 struct netisr_workstream *nwsp; 1084 struct netisr_proto *npp; 1085 struct netisr_work *npwp; 1086 int dosignal, error; 1087 u_int cpuid, dispatch_policy; 1088 1089 KASSERT(proto < NETISR_MAXPROT, 1090 ("%s: invalid proto %u", __func__, proto)); 1091 #ifdef NETISR_LOCKING 1092 NETISR_RLOCK(&tracker); 1093 #endif 1094 npp = &netisr_proto[proto]; 1095 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 1096 proto)); 1097 1098 #ifdef VIMAGE 1099 if (V_netisr_enable[proto] == 0) { 1100 m_freem(m); 1101 return (ENOPROTOOPT); 1102 } 1103 #endif 1104 1105 dispatch_policy = netisr_get_dispatch(npp); 1106 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 1107 return (netisr_queue_src(proto, source, m)); 1108 1109 /* 1110 * If direct dispatch is forced, then unconditionally dispatch 1111 * without a formal CPU selection. Borrow the current CPU's stats, 1112 * even if there's no worker on it. In this case we don't update 1113 * nws_flags because all netisr processing will be source ordered due 1114 * to always being forced to directly dispatch. 1115 */ 1116 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 1117 nwsp = DPCPU_PTR(nws); 1118 npwp = &nwsp->nws_work[proto]; 1119 npwp->nw_dispatched++; 1120 npwp->nw_handled++; 1121 netisr_proto[proto].np_handler(m); 1122 error = 0; 1123 goto out_unlock; 1124 } 1125 1126 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 1127 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 1128 1129 /* 1130 * Otherwise, we execute in a hybrid mode where we will try to direct 1131 * dispatch if we're on the right CPU and the netisr worker isn't 1132 * already running. 1133 */ 1134 sched_pin(); 1135 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 1136 source, m, &cpuid); 1137 if (m == NULL) { 1138 error = ENOBUFS; 1139 goto out_unpin; 1140 } 1141 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1142 if (cpuid != curcpu) 1143 goto queue_fallback; 1144 nwsp = DPCPU_PTR(nws); 1145 npwp = &nwsp->nws_work[proto]; 1146 1147 /*- 1148 * We are willing to direct dispatch only if three conditions hold: 1149 * 1150 * (1) The netisr worker isn't already running, 1151 * (2) Another thread isn't already directly dispatching, and 1152 * (3) The netisr hasn't already been woken up. 1153 */ 1154 NWS_LOCK(nwsp); 1155 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1156 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1157 &dosignal); 1158 NWS_UNLOCK(nwsp); 1159 if (dosignal) 1160 NWS_SIGNAL(nwsp); 1161 goto out_unpin; 1162 } 1163 1164 /* 1165 * The current thread is now effectively the netisr worker, so set 1166 * the dispatching flag to prevent concurrent processing of the 1167 * stream from another thread (even the netisr worker), which could 1168 * otherwise lead to effective misordering of the stream. 1169 */ 1170 nwsp->nws_flags |= NWS_DISPATCHING; 1171 NWS_UNLOCK(nwsp); 1172 netisr_proto[proto].np_handler(m); 1173 NWS_LOCK(nwsp); 1174 nwsp->nws_flags &= ~NWS_DISPATCHING; 1175 npwp->nw_handled++; 1176 npwp->nw_hybrid_dispatched++; 1177 1178 /* 1179 * If other work was enqueued by another thread while we were direct 1180 * dispatching, we need to signal the netisr worker to do that work. 1181 * In the future, we might want to do some of that work in the 1182 * current thread, rather than trigger further context switches. If 1183 * so, we'll want to establish a reasonable bound on the work done in 1184 * the "borrowed" context. 1185 */ 1186 if (nwsp->nws_pendingbits != 0) { 1187 nwsp->nws_flags |= NWS_SCHEDULED; 1188 dosignal = 1; 1189 } else 1190 dosignal = 0; 1191 NWS_UNLOCK(nwsp); 1192 if (dosignal) 1193 NWS_SIGNAL(nwsp); 1194 error = 0; 1195 goto out_unpin; 1196 1197 queue_fallback: 1198 error = netisr_queue_internal(proto, m, cpuid); 1199 out_unpin: 1200 sched_unpin(); 1201 out_unlock: 1202 #ifdef NETISR_LOCKING 1203 NETISR_RUNLOCK(&tracker); 1204 #endif 1205 return (error); 1206 } 1207 1208 int 1209 netisr_dispatch(u_int proto, struct mbuf *m) 1210 { 1211 1212 return (netisr_dispatch_src(proto, 0, m)); 1213 } 1214 1215 #ifdef DEVICE_POLLING 1216 /* 1217 * Kernel polling borrows a netisr thread to run interface polling in; this 1218 * function allows kernel polling to request that the netisr thread be 1219 * scheduled even if no packets are pending for protocols. 1220 */ 1221 void 1222 netisr_sched_poll(void) 1223 { 1224 struct netisr_workstream *nwsp; 1225 1226 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1227 NWS_SIGNAL(nwsp); 1228 } 1229 #endif 1230 1231 static void 1232 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1233 { 1234 char swiname[12]; 1235 struct netisr_workstream *nwsp; 1236 int error; 1237 1238 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1239 1240 nwsp = DPCPU_ID_PTR(cpuid, nws); 1241 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1242 nwsp->nws_cpu = cpuid; 1243 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1244 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1245 SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); 1246 if (error) 1247 panic("%s: swi_add %d", __func__, error); 1248 pc->pc_netisr = nwsp->nws_intr_event; 1249 if (netisr_bindthreads) { 1250 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1251 if (error != 0) 1252 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1253 cpuid, error); 1254 } 1255 NETISR_WLOCK(); 1256 nws_array[nws_count] = nwsp->nws_cpu; 1257 nws_count++; 1258 NETISR_WUNLOCK(); 1259 } 1260 1261 /* 1262 * Initialize the netisr subsystem. We rely on BSS and static initialization 1263 * of most fields in global data structures. 1264 * 1265 * Start a worker thread for the boot CPU so that we can support network 1266 * traffic immediately in case the network stack is used before additional 1267 * CPUs are started (for example, diskless boot). 1268 */ 1269 static void 1270 netisr_init(void *arg) 1271 { 1272 #ifdef EARLY_AP_STARTUP 1273 struct pcpu *pc; 1274 #endif 1275 1276 KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); 1277 1278 NETISR_LOCK_INIT(); 1279 if (netisr_maxthreads == 0 || netisr_maxthreads < -1 ) 1280 netisr_maxthreads = 1; /* default behavior */ 1281 else if (netisr_maxthreads == -1) 1282 netisr_maxthreads = mp_ncpus; /* use max cpus */ 1283 if (netisr_maxthreads > mp_ncpus) { 1284 printf("netisr_init: forcing maxthreads from %d to %d\n", 1285 netisr_maxthreads, mp_ncpus); 1286 netisr_maxthreads = mp_ncpus; 1287 } 1288 if (netisr_defaultqlimit > netisr_maxqlimit) { 1289 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1290 netisr_defaultqlimit, netisr_maxqlimit); 1291 netisr_defaultqlimit = netisr_maxqlimit; 1292 } 1293 #ifdef DEVICE_POLLING 1294 /* 1295 * The device polling code is not yet aware of how to deal with 1296 * multiple netisr threads, so for the time being compiling in device 1297 * polling disables parallel netisr workers. 1298 */ 1299 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1300 printf("netisr_init: forcing maxthreads to 1 and " 1301 "bindthreads to 0 for device polling\n"); 1302 netisr_maxthreads = 1; 1303 netisr_bindthreads = 0; 1304 } 1305 #endif 1306 1307 #ifdef EARLY_AP_STARTUP 1308 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1309 if (nws_count >= netisr_maxthreads) 1310 break; 1311 netisr_start_swi(pc->pc_cpuid, pc); 1312 } 1313 #else 1314 netisr_start_swi(curcpu, pcpu_find(curcpu)); 1315 #endif 1316 } 1317 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1318 1319 #ifndef EARLY_AP_STARTUP 1320 /* 1321 * Start worker threads for additional CPUs. No attempt to gracefully handle 1322 * work reassignment, we don't yet support dynamic reconfiguration. 1323 */ 1324 static void 1325 netisr_start(void *arg) 1326 { 1327 struct pcpu *pc; 1328 1329 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1330 if (nws_count >= netisr_maxthreads) 1331 break; 1332 /* Worker will already be present for boot CPU. */ 1333 if (pc->pc_netisr != NULL) 1334 continue; 1335 netisr_start_swi(pc->pc_cpuid, pc); 1336 } 1337 } 1338 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1339 #endif 1340 1341 /* 1342 * Sysctl monitoring for netisr: query a list of registered protocols. 1343 */ 1344 static int 1345 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1346 { 1347 struct rm_priotracker tracker; 1348 struct sysctl_netisr_proto *snpp, *snp_array; 1349 struct netisr_proto *npp; 1350 u_int counter, proto; 1351 int error; 1352 1353 if (req->newptr != NULL) 1354 return (EINVAL); 1355 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1356 M_ZERO | M_WAITOK); 1357 counter = 0; 1358 NETISR_RLOCK(&tracker); 1359 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1360 npp = &netisr_proto[proto]; 1361 if (npp->np_name == NULL) 1362 continue; 1363 snpp = &snp_array[counter]; 1364 snpp->snp_version = sizeof(*snpp); 1365 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1366 snpp->snp_proto = proto; 1367 snpp->snp_qlimit = npp->np_qlimit; 1368 snpp->snp_policy = npp->np_policy; 1369 snpp->snp_dispatch = npp->np_dispatch; 1370 if (npp->np_m2flow != NULL) 1371 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1372 if (npp->np_m2cpuid != NULL) 1373 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1374 if (npp->np_drainedcpu != NULL) 1375 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1376 counter++; 1377 } 1378 NETISR_RUNLOCK(&tracker); 1379 KASSERT(counter <= NETISR_MAXPROT, 1380 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1381 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1382 free(snp_array, M_TEMP); 1383 return (error); 1384 } 1385 1386 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1387 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1388 "S,sysctl_netisr_proto", 1389 "Return list of protocols registered with netisr"); 1390 1391 /* 1392 * Sysctl monitoring for netisr: query a list of workstreams. 1393 */ 1394 static int 1395 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1396 { 1397 struct rm_priotracker tracker; 1398 struct sysctl_netisr_workstream *snwsp, *snws_array; 1399 struct netisr_workstream *nwsp; 1400 u_int counter, cpuid; 1401 int error; 1402 1403 if (req->newptr != NULL) 1404 return (EINVAL); 1405 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1406 M_ZERO | M_WAITOK); 1407 counter = 0; 1408 NETISR_RLOCK(&tracker); 1409 CPU_FOREACH(cpuid) { 1410 nwsp = DPCPU_ID_PTR(cpuid, nws); 1411 if (nwsp->nws_intr_event == NULL) 1412 continue; 1413 NWS_LOCK(nwsp); 1414 snwsp = &snws_array[counter]; 1415 snwsp->snws_version = sizeof(*snwsp); 1416 1417 /* 1418 * For now, we equate workstream IDs and CPU IDs in the 1419 * kernel, but expose them independently to userspace in case 1420 * that assumption changes in the future. 1421 */ 1422 snwsp->snws_wsid = cpuid; 1423 snwsp->snws_cpu = cpuid; 1424 if (nwsp->nws_intr_event != NULL) 1425 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1426 NWS_UNLOCK(nwsp); 1427 counter++; 1428 } 1429 NETISR_RUNLOCK(&tracker); 1430 KASSERT(counter <= MAXCPU, 1431 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1432 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1433 free(snws_array, M_TEMP); 1434 return (error); 1435 } 1436 1437 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1438 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1439 "S,sysctl_netisr_workstream", 1440 "Return list of workstreams implemented by netisr"); 1441 1442 /* 1443 * Sysctl monitoring for netisr: query per-protocol data across all 1444 * workstreams. 1445 */ 1446 static int 1447 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1448 { 1449 struct rm_priotracker tracker; 1450 struct sysctl_netisr_work *snwp, *snw_array; 1451 struct netisr_workstream *nwsp; 1452 struct netisr_proto *npp; 1453 struct netisr_work *nwp; 1454 u_int counter, cpuid, proto; 1455 int error; 1456 1457 if (req->newptr != NULL) 1458 return (EINVAL); 1459 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1460 M_TEMP, M_ZERO | M_WAITOK); 1461 counter = 0; 1462 NETISR_RLOCK(&tracker); 1463 CPU_FOREACH(cpuid) { 1464 nwsp = DPCPU_ID_PTR(cpuid, nws); 1465 if (nwsp->nws_intr_event == NULL) 1466 continue; 1467 NWS_LOCK(nwsp); 1468 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1469 npp = &netisr_proto[proto]; 1470 if (npp->np_name == NULL) 1471 continue; 1472 nwp = &nwsp->nws_work[proto]; 1473 snwp = &snw_array[counter]; 1474 snwp->snw_version = sizeof(*snwp); 1475 snwp->snw_wsid = cpuid; /* See comment above. */ 1476 snwp->snw_proto = proto; 1477 snwp->snw_len = nwp->nw_len; 1478 snwp->snw_watermark = nwp->nw_watermark; 1479 snwp->snw_dispatched = nwp->nw_dispatched; 1480 snwp->snw_hybrid_dispatched = 1481 nwp->nw_hybrid_dispatched; 1482 snwp->snw_qdrops = nwp->nw_qdrops; 1483 snwp->snw_queued = nwp->nw_queued; 1484 snwp->snw_handled = nwp->nw_handled; 1485 counter++; 1486 } 1487 NWS_UNLOCK(nwsp); 1488 } 1489 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1490 ("sysctl_netisr_work: counter too big (%d)", counter)); 1491 NETISR_RUNLOCK(&tracker); 1492 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1493 free(snw_array, M_TEMP); 1494 return (error); 1495 } 1496 1497 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1498 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1499 "S,sysctl_netisr_work", 1500 "Return list of per-workstream, per-protocol work in netisr"); 1501 1502 #ifdef DDB 1503 DB_SHOW_COMMAND(netisr, db_show_netisr) 1504 { 1505 struct netisr_workstream *nwsp; 1506 struct netisr_work *nwp; 1507 int first, proto; 1508 u_int cpuid; 1509 1510 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1511 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1512 CPU_FOREACH(cpuid) { 1513 nwsp = DPCPU_ID_PTR(cpuid, nws); 1514 if (nwsp->nws_intr_event == NULL) 1515 continue; 1516 first = 1; 1517 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1518 if (netisr_proto[proto].np_handler == NULL) 1519 continue; 1520 nwp = &nwsp->nws_work[proto]; 1521 if (first) { 1522 db_printf("%3d ", cpuid); 1523 first = 0; 1524 } else 1525 db_printf("%3s ", ""); 1526 db_printf( 1527 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1528 netisr_proto[proto].np_name, nwp->nw_len, 1529 nwp->nw_watermark, nwp->nw_qlimit, 1530 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1531 nwp->nw_qdrops, nwp->nw_queued); 1532 } 1533 } 1534 } 1535 #endif 1536