1 /*- 2 * Copyright (c) 2007-2009 Robert N. M. Watson 3 * Copyright (c) 2010-2011 Juniper Networks, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Robert N. M. Watson under contract 7 * to Juniper Networks, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 /* 35 * netisr is a packet dispatch service, allowing synchronous (directly 36 * dispatched) and asynchronous (deferred dispatch) processing of packets by 37 * registered protocol handlers. Callers pass a protocol identifier and 38 * packet to netisr, along with a direct dispatch hint, and work will either 39 * be immediately processed by the registered handler, or passed to a 40 * software interrupt (SWI) thread for deferred dispatch. Callers will 41 * generally select one or the other based on: 42 * 43 * - Whether directly dispatching a netisr handler lead to code reentrance or 44 * lock recursion, such as entering the socket code from the socket code. 45 * - Whether directly dispatching a netisr handler lead to recursive 46 * processing, such as when decapsulating several wrapped layers of tunnel 47 * information (IPSEC within IPSEC within ...). 48 * 49 * Maintaining ordering for protocol streams is a critical design concern. 50 * Enforcing ordering limits the opportunity for concurrency, but maintains 51 * the strong ordering requirements found in some protocols, such as TCP. Of 52 * related concern is CPU affinity--it is desirable to process all data 53 * associated with a particular stream on the same CPU over time in order to 54 * avoid acquiring locks associated with the connection on different CPUs, 55 * keep connection data in one cache, and to generally encourage associated 56 * user threads to live on the same CPU as the stream. It's also desirable 57 * to avoid lock migration and contention where locks are associated with 58 * more than one flow. 59 * 60 * netisr supports several policy variations, represented by the 61 * NETISR_POLICY_* constants, allowing protocols to play various roles in 62 * identifying flows, assigning work to CPUs, etc. These are described in 63 * netisr.h. 64 */ 65 66 #include "opt_ddb.h" 67 #include "opt_device_polling.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/kernel.h> 72 #include <sys/kthread.h> 73 #include <sys/malloc.h> 74 #include <sys/interrupt.h> 75 #include <sys/lock.h> 76 #include <sys/mbuf.h> 77 #include <sys/mutex.h> 78 #include <sys/pcpu.h> 79 #include <sys/proc.h> 80 #include <sys/rmlock.h> 81 #include <sys/sched.h> 82 #include <sys/smp.h> 83 #include <sys/socket.h> 84 #include <sys/sysctl.h> 85 #include <sys/systm.h> 86 87 #ifdef DDB 88 #include <ddb/ddb.h> 89 #endif 90 91 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 92 #include <net/if.h> 93 #include <net/if_var.h> 94 #include <net/netisr.h> 95 #include <net/netisr_internal.h> 96 #include <net/vnet.h> 97 98 /*- 99 * Synchronize use and modification of the registered netisr data structures; 100 * acquire a read lock while modifying the set of registered protocols to 101 * prevent partially registered or unregistered protocols from being run. 102 * 103 * The following data structures and fields are protected by this lock: 104 * 105 * - The netisr_proto array, including all fields of struct netisr_proto. 106 * - The nws array, including all fields of struct netisr_worker. 107 * - The nws_array array. 108 * 109 * Note: the NETISR_LOCKING define controls whether read locks are acquired 110 * in packet processing paths requiring netisr registration stability. This 111 * is disabled by default as it can lead to measurable performance 112 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 113 * because netisr registration and unregistration is extremely rare at 114 * runtime. If it becomes more common, this decision should be revisited. 115 * 116 * XXXRW: rmlocks don't support assertions. 117 */ 118 static struct rmlock netisr_rmlock; 119 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 120 RM_NOWITNESS) 121 #define NETISR_LOCK_ASSERT() 122 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 123 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 124 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 125 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 126 /* #define NETISR_LOCKING */ 127 128 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 129 130 /*- 131 * Three global direct dispatch policies are supported: 132 * 133 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 134 * context (may be overriden by protocols). 135 * 136 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 137 * and we're running on the CPU the work would be performed on, then direct 138 * dispatch it if it wouldn't violate ordering constraints on the workstream. 139 * 140 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 141 * always direct dispatch. (The default.) 142 * 143 * Notice that changing the global policy could lead to short periods of 144 * misordered processing, but this is considered acceptable as compared to 145 * the complexity of enforcing ordering during policy changes. Protocols can 146 * override the global policy (when they're not doing that, they select 147 * NETISR_DISPATCH_DEFAULT). 148 */ 149 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 150 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 151 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 152 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 153 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN, 154 0, 0, sysctl_netisr_dispatch_policy, "A", 155 "netisr dispatch policy"); 156 157 /* 158 * Allow the administrator to limit the number of threads (CPUs) to use for 159 * netisr. We don't check netisr_maxthreads before creating the thread for 160 * CPU 0. This must be set at boot. We will create at most one thread per CPU. 161 * By default we initialize this to 1 which would assign just 1 cpu (cpu0) and 162 * therefore only 1 workstream. If set to -1, netisr would use all cpus 163 * (mp_ncpus) and therefore would have those many workstreams. One workstream 164 * per thread (CPU). 165 */ 166 static int netisr_maxthreads = 1; /* Max number of threads. */ 167 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 168 &netisr_maxthreads, 0, 169 "Use at most this many CPUs for netisr processing"); 170 171 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 172 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 173 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 174 175 /* 176 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 177 * both for initial configuration and later modification using 178 * netisr_setqlimit(). 179 */ 180 #define NETISR_DEFAULT_MAXQLIMIT 10240 181 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 182 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 183 &netisr_maxqlimit, 0, 184 "Maximum netisr per-protocol, per-CPU queue depth."); 185 186 /* 187 * The default per-workstream mbuf queue limit for protocols that don't 188 * initialize the nh_qlimit field of their struct netisr_handler. If this is 189 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 190 */ 191 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 192 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 193 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 194 &netisr_defaultqlimit, 0, 195 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 196 197 /* 198 * Store and export the compile-time constant NETISR_MAXPROT limit on the 199 * number of protocols that can register with netisr at a time. This is 200 * required for crashdump analysis, as it sizes netisr_proto[]. 201 */ 202 static u_int netisr_maxprot = NETISR_MAXPROT; 203 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 204 &netisr_maxprot, 0, 205 "Compile-time limit on the number of protocols supported by netisr."); 206 207 /* 208 * The netisr_proto array describes all registered protocols, indexed by 209 * protocol number. See netisr_internal.h for more details. 210 */ 211 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 212 213 /* 214 * Per-CPU workstream data. See netisr_internal.h for more details. 215 */ 216 DPCPU_DEFINE(struct netisr_workstream, nws); 217 218 /* 219 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 220 * accessing workstreams. This allows constructions of the form 221 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 222 */ 223 static u_int nws_array[MAXCPU]; 224 225 /* 226 * Number of registered workstreams. Will be at most the number of running 227 * CPUs once fully started. 228 */ 229 static u_int nws_count; 230 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 231 &nws_count, 0, "Number of extant netisr threads."); 232 233 /* 234 * Synchronization for each workstream: a mutex protects all mutable fields 235 * in each stream, including per-protocol state (mbuf queues). The SWI is 236 * woken up if asynchronous dispatch is required. 237 */ 238 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 239 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 240 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 241 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 242 243 /* 244 * Utility routines for protocols that implement their own mapping of flows 245 * to CPUs. 246 */ 247 u_int 248 netisr_get_cpucount(void) 249 { 250 251 return (nws_count); 252 } 253 254 u_int 255 netisr_get_cpuid(u_int cpunumber) 256 { 257 258 KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, 259 nws_count)); 260 261 return (nws_array[cpunumber]); 262 } 263 264 /* 265 * The default implementation of flow -> CPU ID mapping. 266 * 267 * Non-static so that protocols can use it to map their own work to specific 268 * CPUs in a manner consistent to netisr for affinity purposes. 269 */ 270 u_int 271 netisr_default_flow2cpu(u_int flowid) 272 { 273 274 return (nws_array[flowid % nws_count]); 275 } 276 277 /* 278 * Dispatch tunable and sysctl configuration. 279 */ 280 struct netisr_dispatch_table_entry { 281 u_int ndte_policy; 282 const char *ndte_policy_str; 283 }; 284 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 285 { NETISR_DISPATCH_DEFAULT, "default" }, 286 { NETISR_DISPATCH_DEFERRED, "deferred" }, 287 { NETISR_DISPATCH_HYBRID, "hybrid" }, 288 { NETISR_DISPATCH_DIRECT, "direct" }, 289 }; 290 291 static void 292 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 293 u_int buflen) 294 { 295 const struct netisr_dispatch_table_entry *ndtep; 296 const char *str; 297 u_int i; 298 299 str = "unknown"; 300 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 301 ndtep = &netisr_dispatch_table[i]; 302 if (ndtep->ndte_policy == dispatch_policy) { 303 str = ndtep->ndte_policy_str; 304 break; 305 } 306 } 307 snprintf(buffer, buflen, "%s", str); 308 } 309 310 static int 311 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 312 { 313 const struct netisr_dispatch_table_entry *ndtep; 314 u_int i; 315 316 for (i = 0; i < nitems(netisr_dispatch_table); i++) { 317 ndtep = &netisr_dispatch_table[i]; 318 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 319 *dispatch_policyp = ndtep->ndte_policy; 320 return (0); 321 } 322 } 323 return (EINVAL); 324 } 325 326 static int 327 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 328 { 329 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 330 u_int dispatch_policy; 331 int error; 332 333 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 334 sizeof(tmp)); 335 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 336 if (error == 0 && req->newptr != NULL) { 337 error = netisr_dispatch_policy_from_str(tmp, 338 &dispatch_policy); 339 if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) 340 error = EINVAL; 341 if (error == 0) 342 netisr_dispatch_policy = dispatch_policy; 343 } 344 return (error); 345 } 346 347 /* 348 * Register a new netisr handler, which requires initializing per-protocol 349 * fields for each workstream. All netisr work is briefly suspended while 350 * the protocol is installed. 351 */ 352 void 353 netisr_register(const struct netisr_handler *nhp) 354 { 355 struct netisr_work *npwp; 356 const char *name; 357 u_int i, proto; 358 359 proto = nhp->nh_proto; 360 name = nhp->nh_name; 361 362 /* 363 * Test that the requested registration is valid. 364 */ 365 KASSERT(nhp->nh_name != NULL, 366 ("%s: nh_name NULL for %u", __func__, proto)); 367 KASSERT(nhp->nh_handler != NULL, 368 ("%s: nh_handler NULL for %s", __func__, name)); 369 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 370 nhp->nh_policy == NETISR_POLICY_FLOW || 371 nhp->nh_policy == NETISR_POLICY_CPU, 372 ("%s: unsupported nh_policy %u for %s", __func__, 373 nhp->nh_policy, name)); 374 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 375 nhp->nh_m2flow == NULL, 376 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 377 name)); 378 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 379 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 380 name)); 381 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 382 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 383 name)); 384 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 385 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 386 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 387 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 388 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 389 390 KASSERT(proto < NETISR_MAXPROT, 391 ("%s(%u, %s): protocol too big", __func__, proto, name)); 392 393 /* 394 * Test that no existing registration exists for this protocol. 395 */ 396 NETISR_WLOCK(); 397 KASSERT(netisr_proto[proto].np_name == NULL, 398 ("%s(%u, %s): name present", __func__, proto, name)); 399 KASSERT(netisr_proto[proto].np_handler == NULL, 400 ("%s(%u, %s): handler present", __func__, proto, name)); 401 402 netisr_proto[proto].np_name = name; 403 netisr_proto[proto].np_handler = nhp->nh_handler; 404 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 405 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 406 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 407 if (nhp->nh_qlimit == 0) 408 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 409 else if (nhp->nh_qlimit > netisr_maxqlimit) { 410 printf("%s: %s requested queue limit %u capped to " 411 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 412 netisr_maxqlimit); 413 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 414 } else 415 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 416 netisr_proto[proto].np_policy = nhp->nh_policy; 417 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 418 CPU_FOREACH(i) { 419 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 420 bzero(npwp, sizeof(*npwp)); 421 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 422 } 423 NETISR_WUNLOCK(); 424 } 425 426 /* 427 * Clear drop counters across all workstreams for a protocol. 428 */ 429 void 430 netisr_clearqdrops(const struct netisr_handler *nhp) 431 { 432 struct netisr_work *npwp; 433 #ifdef INVARIANTS 434 const char *name; 435 #endif 436 u_int i, proto; 437 438 proto = nhp->nh_proto; 439 #ifdef INVARIANTS 440 name = nhp->nh_name; 441 #endif 442 KASSERT(proto < NETISR_MAXPROT, 443 ("%s(%u): protocol too big for %s", __func__, proto, name)); 444 445 NETISR_WLOCK(); 446 KASSERT(netisr_proto[proto].np_handler != NULL, 447 ("%s(%u): protocol not registered for %s", __func__, proto, 448 name)); 449 450 CPU_FOREACH(i) { 451 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 452 npwp->nw_qdrops = 0; 453 } 454 NETISR_WUNLOCK(); 455 } 456 457 /* 458 * Query current drop counters across all workstreams for a protocol. 459 */ 460 void 461 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 462 { 463 struct netisr_work *npwp; 464 struct rm_priotracker tracker; 465 #ifdef INVARIANTS 466 const char *name; 467 #endif 468 u_int i, proto; 469 470 *qdropp = 0; 471 proto = nhp->nh_proto; 472 #ifdef INVARIANTS 473 name = nhp->nh_name; 474 #endif 475 KASSERT(proto < NETISR_MAXPROT, 476 ("%s(%u): protocol too big for %s", __func__, proto, name)); 477 478 NETISR_RLOCK(&tracker); 479 KASSERT(netisr_proto[proto].np_handler != NULL, 480 ("%s(%u): protocol not registered for %s", __func__, proto, 481 name)); 482 483 CPU_FOREACH(i) { 484 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 485 *qdropp += npwp->nw_qdrops; 486 } 487 NETISR_RUNLOCK(&tracker); 488 } 489 490 /* 491 * Query current per-workstream queue limit for a protocol. 492 */ 493 void 494 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 495 { 496 struct rm_priotracker tracker; 497 #ifdef INVARIANTS 498 const char *name; 499 #endif 500 u_int proto; 501 502 proto = nhp->nh_proto; 503 #ifdef INVARIANTS 504 name = nhp->nh_name; 505 #endif 506 KASSERT(proto < NETISR_MAXPROT, 507 ("%s(%u): protocol too big for %s", __func__, proto, name)); 508 509 NETISR_RLOCK(&tracker); 510 KASSERT(netisr_proto[proto].np_handler != NULL, 511 ("%s(%u): protocol not registered for %s", __func__, proto, 512 name)); 513 *qlimitp = netisr_proto[proto].np_qlimit; 514 NETISR_RUNLOCK(&tracker); 515 } 516 517 /* 518 * Update the queue limit across per-workstream queues for a protocol. We 519 * simply change the limits, and don't drain overflowed packets as they will 520 * (hopefully) take care of themselves shortly. 521 */ 522 int 523 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 524 { 525 struct netisr_work *npwp; 526 #ifdef INVARIANTS 527 const char *name; 528 #endif 529 u_int i, proto; 530 531 if (qlimit > netisr_maxqlimit) 532 return (EINVAL); 533 534 proto = nhp->nh_proto; 535 #ifdef INVARIANTS 536 name = nhp->nh_name; 537 #endif 538 KASSERT(proto < NETISR_MAXPROT, 539 ("%s(%u): protocol too big for %s", __func__, proto, name)); 540 541 NETISR_WLOCK(); 542 KASSERT(netisr_proto[proto].np_handler != NULL, 543 ("%s(%u): protocol not registered for %s", __func__, proto, 544 name)); 545 546 netisr_proto[proto].np_qlimit = qlimit; 547 CPU_FOREACH(i) { 548 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 549 npwp->nw_qlimit = qlimit; 550 } 551 NETISR_WUNLOCK(); 552 return (0); 553 } 554 555 /* 556 * Drain all packets currently held in a particular protocol work queue. 557 */ 558 static void 559 netisr_drain_proto(struct netisr_work *npwp) 560 { 561 struct mbuf *m; 562 563 /* 564 * We would assert the lock on the workstream but it's not passed in. 565 */ 566 while ((m = npwp->nw_head) != NULL) { 567 npwp->nw_head = m->m_nextpkt; 568 m->m_nextpkt = NULL; 569 if (npwp->nw_head == NULL) 570 npwp->nw_tail = NULL; 571 npwp->nw_len--; 572 m_freem(m); 573 } 574 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 575 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 576 } 577 578 /* 579 * Remove the registration of a network protocol, which requires clearing 580 * per-protocol fields across all workstreams, including freeing all mbufs in 581 * the queues at time of unregister. All work in netisr is briefly suspended 582 * while this takes place. 583 */ 584 void 585 netisr_unregister(const struct netisr_handler *nhp) 586 { 587 struct netisr_work *npwp; 588 #ifdef INVARIANTS 589 const char *name; 590 #endif 591 u_int i, proto; 592 593 proto = nhp->nh_proto; 594 #ifdef INVARIANTS 595 name = nhp->nh_name; 596 #endif 597 KASSERT(proto < NETISR_MAXPROT, 598 ("%s(%u): protocol too big for %s", __func__, proto, name)); 599 600 NETISR_WLOCK(); 601 KASSERT(netisr_proto[proto].np_handler != NULL, 602 ("%s(%u): protocol not registered for %s", __func__, proto, 603 name)); 604 605 netisr_proto[proto].np_name = NULL; 606 netisr_proto[proto].np_handler = NULL; 607 netisr_proto[proto].np_m2flow = NULL; 608 netisr_proto[proto].np_m2cpuid = NULL; 609 netisr_proto[proto].np_qlimit = 0; 610 netisr_proto[proto].np_policy = 0; 611 CPU_FOREACH(i) { 612 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 613 netisr_drain_proto(npwp); 614 bzero(npwp, sizeof(*npwp)); 615 } 616 NETISR_WUNLOCK(); 617 } 618 619 /* 620 * Compose the global and per-protocol policies on dispatch, and return the 621 * dispatch policy to use. 622 */ 623 static u_int 624 netisr_get_dispatch(struct netisr_proto *npp) 625 { 626 627 /* 628 * Protocol-specific configuration overrides the global default. 629 */ 630 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 631 return (npp->np_dispatch); 632 return (netisr_dispatch_policy); 633 } 634 635 /* 636 * Look up the workstream given a packet and source identifier. Do this by 637 * checking the protocol's policy, and optionally call out to the protocol 638 * for assistance if required. 639 */ 640 static struct mbuf * 641 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 642 uintptr_t source, struct mbuf *m, u_int *cpuidp) 643 { 644 struct ifnet *ifp; 645 u_int policy; 646 647 NETISR_LOCK_ASSERT(); 648 649 /* 650 * In the event we have only one worker, shortcut and deliver to it 651 * without further ado. 652 */ 653 if (nws_count == 1) { 654 *cpuidp = nws_array[0]; 655 return (m); 656 } 657 658 /* 659 * What happens next depends on the policy selected by the protocol. 660 * If we want to support per-interface policies, we should do that 661 * here first. 662 */ 663 policy = npp->np_policy; 664 if (policy == NETISR_POLICY_CPU) { 665 m = npp->np_m2cpuid(m, source, cpuidp); 666 if (m == NULL) 667 return (NULL); 668 669 /* 670 * It's possible for a protocol not to have a good idea about 671 * where to process a packet, in which case we fall back on 672 * the netisr code to decide. In the hybrid case, return the 673 * current CPU ID, which will force an immediate direct 674 * dispatch. In the queued case, fall back on the SOURCE 675 * policy. 676 */ 677 if (*cpuidp != NETISR_CPUID_NONE) 678 return (m); 679 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 680 *cpuidp = curcpu; 681 return (m); 682 } 683 policy = NETISR_POLICY_SOURCE; 684 } 685 686 if (policy == NETISR_POLICY_FLOW) { 687 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 688 npp->np_m2flow != NULL) { 689 m = npp->np_m2flow(m, source); 690 if (m == NULL) 691 return (NULL); 692 } 693 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 694 *cpuidp = 695 netisr_default_flow2cpu(m->m_pkthdr.flowid); 696 return (m); 697 } 698 policy = NETISR_POLICY_SOURCE; 699 } 700 701 KASSERT(policy == NETISR_POLICY_SOURCE, 702 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 703 npp->np_name)); 704 705 ifp = m->m_pkthdr.rcvif; 706 if (ifp != NULL) 707 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 708 else 709 *cpuidp = nws_array[source % nws_count]; 710 return (m); 711 } 712 713 /* 714 * Process packets associated with a workstream and protocol. For reasons of 715 * fairness, we process up to one complete netisr queue at a time, moving the 716 * queue to a stack-local queue for processing, but do not loop refreshing 717 * from the global queue. The caller is responsible for deciding whether to 718 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 719 * locked on entry and relocked before return, but will be released while 720 * processing. The number of packets processed is returned. 721 */ 722 static u_int 723 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 724 { 725 struct netisr_work local_npw, *npwp; 726 u_int handled; 727 struct mbuf *m; 728 729 NETISR_LOCK_ASSERT(); 730 NWS_LOCK_ASSERT(nwsp); 731 732 KASSERT(nwsp->nws_flags & NWS_RUNNING, 733 ("%s(%u): not running", __func__, proto)); 734 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 735 ("%s(%u): invalid proto\n", __func__, proto)); 736 737 npwp = &nwsp->nws_work[proto]; 738 if (npwp->nw_len == 0) 739 return (0); 740 741 /* 742 * Move the global work queue to a thread-local work queue. 743 * 744 * Notice that this means the effective maximum length of the queue 745 * is actually twice that of the maximum queue length specified in 746 * the protocol registration call. 747 */ 748 handled = npwp->nw_len; 749 local_npw = *npwp; 750 npwp->nw_head = NULL; 751 npwp->nw_tail = NULL; 752 npwp->nw_len = 0; 753 nwsp->nws_pendingbits &= ~(1 << proto); 754 NWS_UNLOCK(nwsp); 755 while ((m = local_npw.nw_head) != NULL) { 756 local_npw.nw_head = m->m_nextpkt; 757 m->m_nextpkt = NULL; 758 if (local_npw.nw_head == NULL) 759 local_npw.nw_tail = NULL; 760 local_npw.nw_len--; 761 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 762 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 763 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 764 netisr_proto[proto].np_handler(m); 765 CURVNET_RESTORE(); 766 } 767 KASSERT(local_npw.nw_len == 0, 768 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 769 if (netisr_proto[proto].np_drainedcpu) 770 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 771 NWS_LOCK(nwsp); 772 npwp->nw_handled += handled; 773 return (handled); 774 } 775 776 /* 777 * SWI handler for netisr -- processes packets in a set of workstreams that 778 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 779 * being direct dispatched, go back to sleep and wait for the dispatching 780 * thread to wake us up again. 781 */ 782 static void 783 swi_net(void *arg) 784 { 785 #ifdef NETISR_LOCKING 786 struct rm_priotracker tracker; 787 #endif 788 struct netisr_workstream *nwsp; 789 u_int bits, prot; 790 791 nwsp = arg; 792 793 #ifdef DEVICE_POLLING 794 KASSERT(nws_count == 1, 795 ("%s: device_polling but nws_count != 1", __func__)); 796 netisr_poll(); 797 #endif 798 #ifdef NETISR_LOCKING 799 NETISR_RLOCK(&tracker); 800 #endif 801 NWS_LOCK(nwsp); 802 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 803 if (nwsp->nws_flags & NWS_DISPATCHING) 804 goto out; 805 nwsp->nws_flags |= NWS_RUNNING; 806 nwsp->nws_flags &= ~NWS_SCHEDULED; 807 while ((bits = nwsp->nws_pendingbits) != 0) { 808 while ((prot = ffs(bits)) != 0) { 809 prot--; 810 bits &= ~(1 << prot); 811 (void)netisr_process_workstream_proto(nwsp, prot); 812 } 813 } 814 nwsp->nws_flags &= ~NWS_RUNNING; 815 out: 816 NWS_UNLOCK(nwsp); 817 #ifdef NETISR_LOCKING 818 NETISR_RUNLOCK(&tracker); 819 #endif 820 #ifdef DEVICE_POLLING 821 netisr_pollmore(); 822 #endif 823 } 824 825 static int 826 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 827 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 828 { 829 830 NWS_LOCK_ASSERT(nwsp); 831 832 *dosignalp = 0; 833 if (npwp->nw_len < npwp->nw_qlimit) { 834 m->m_nextpkt = NULL; 835 if (npwp->nw_head == NULL) { 836 npwp->nw_head = m; 837 npwp->nw_tail = m; 838 } else { 839 npwp->nw_tail->m_nextpkt = m; 840 npwp->nw_tail = m; 841 } 842 npwp->nw_len++; 843 if (npwp->nw_len > npwp->nw_watermark) 844 npwp->nw_watermark = npwp->nw_len; 845 846 /* 847 * We must set the bit regardless of NWS_RUNNING, so that 848 * swi_net() keeps calling netisr_process_workstream_proto(). 849 */ 850 nwsp->nws_pendingbits |= (1 << proto); 851 if (!(nwsp->nws_flags & 852 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 853 nwsp->nws_flags |= NWS_SCHEDULED; 854 *dosignalp = 1; /* Defer until unlocked. */ 855 } 856 npwp->nw_queued++; 857 return (0); 858 } else { 859 m_freem(m); 860 npwp->nw_qdrops++; 861 return (ENOBUFS); 862 } 863 } 864 865 static int 866 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 867 { 868 struct netisr_workstream *nwsp; 869 struct netisr_work *npwp; 870 int dosignal, error; 871 872 #ifdef NETISR_LOCKING 873 NETISR_LOCK_ASSERT(); 874 #endif 875 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 876 cpuid, mp_maxid)); 877 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 878 879 dosignal = 0; 880 error = 0; 881 nwsp = DPCPU_ID_PTR(cpuid, nws); 882 npwp = &nwsp->nws_work[proto]; 883 NWS_LOCK(nwsp); 884 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 885 NWS_UNLOCK(nwsp); 886 if (dosignal) 887 NWS_SIGNAL(nwsp); 888 return (error); 889 } 890 891 int 892 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 893 { 894 #ifdef NETISR_LOCKING 895 struct rm_priotracker tracker; 896 #endif 897 u_int cpuid; 898 int error; 899 900 KASSERT(proto < NETISR_MAXPROT, 901 ("%s: invalid proto %u", __func__, proto)); 902 903 #ifdef NETISR_LOCKING 904 NETISR_RLOCK(&tracker); 905 #endif 906 KASSERT(netisr_proto[proto].np_handler != NULL, 907 ("%s: invalid proto %u", __func__, proto)); 908 909 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 910 source, m, &cpuid); 911 if (m != NULL) { 912 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 913 cpuid)); 914 error = netisr_queue_internal(proto, m, cpuid); 915 } else 916 error = ENOBUFS; 917 #ifdef NETISR_LOCKING 918 NETISR_RUNLOCK(&tracker); 919 #endif 920 return (error); 921 } 922 923 int 924 netisr_queue(u_int proto, struct mbuf *m) 925 { 926 927 return (netisr_queue_src(proto, 0, m)); 928 } 929 930 /* 931 * Dispatch a packet for netisr processing; direct dispatch is permitted by 932 * calling context. 933 */ 934 int 935 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 936 { 937 #ifdef NETISR_LOCKING 938 struct rm_priotracker tracker; 939 #endif 940 struct netisr_workstream *nwsp; 941 struct netisr_proto *npp; 942 struct netisr_work *npwp; 943 int dosignal, error; 944 u_int cpuid, dispatch_policy; 945 946 KASSERT(proto < NETISR_MAXPROT, 947 ("%s: invalid proto %u", __func__, proto)); 948 #ifdef NETISR_LOCKING 949 NETISR_RLOCK(&tracker); 950 #endif 951 npp = &netisr_proto[proto]; 952 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 953 proto)); 954 955 dispatch_policy = netisr_get_dispatch(npp); 956 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 957 return (netisr_queue_src(proto, source, m)); 958 959 /* 960 * If direct dispatch is forced, then unconditionally dispatch 961 * without a formal CPU selection. Borrow the current CPU's stats, 962 * even if there's no worker on it. In this case we don't update 963 * nws_flags because all netisr processing will be source ordered due 964 * to always being forced to directly dispatch. 965 */ 966 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 967 nwsp = DPCPU_PTR(nws); 968 npwp = &nwsp->nws_work[proto]; 969 npwp->nw_dispatched++; 970 npwp->nw_handled++; 971 netisr_proto[proto].np_handler(m); 972 error = 0; 973 goto out_unlock; 974 } 975 976 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 977 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 978 979 /* 980 * Otherwise, we execute in a hybrid mode where we will try to direct 981 * dispatch if we're on the right CPU and the netisr worker isn't 982 * already running. 983 */ 984 sched_pin(); 985 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 986 source, m, &cpuid); 987 if (m == NULL) { 988 error = ENOBUFS; 989 goto out_unpin; 990 } 991 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 992 if (cpuid != curcpu) 993 goto queue_fallback; 994 nwsp = DPCPU_PTR(nws); 995 npwp = &nwsp->nws_work[proto]; 996 997 /*- 998 * We are willing to direct dispatch only if three conditions hold: 999 * 1000 * (1) The netisr worker isn't already running, 1001 * (2) Another thread isn't already directly dispatching, and 1002 * (3) The netisr hasn't already been woken up. 1003 */ 1004 NWS_LOCK(nwsp); 1005 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1006 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1007 &dosignal); 1008 NWS_UNLOCK(nwsp); 1009 if (dosignal) 1010 NWS_SIGNAL(nwsp); 1011 goto out_unpin; 1012 } 1013 1014 /* 1015 * The current thread is now effectively the netisr worker, so set 1016 * the dispatching flag to prevent concurrent processing of the 1017 * stream from another thread (even the netisr worker), which could 1018 * otherwise lead to effective misordering of the stream. 1019 */ 1020 nwsp->nws_flags |= NWS_DISPATCHING; 1021 NWS_UNLOCK(nwsp); 1022 netisr_proto[proto].np_handler(m); 1023 NWS_LOCK(nwsp); 1024 nwsp->nws_flags &= ~NWS_DISPATCHING; 1025 npwp->nw_handled++; 1026 npwp->nw_hybrid_dispatched++; 1027 1028 /* 1029 * If other work was enqueued by another thread while we were direct 1030 * dispatching, we need to signal the netisr worker to do that work. 1031 * In the future, we might want to do some of that work in the 1032 * current thread, rather than trigger further context switches. If 1033 * so, we'll want to establish a reasonable bound on the work done in 1034 * the "borrowed" context. 1035 */ 1036 if (nwsp->nws_pendingbits != 0) { 1037 nwsp->nws_flags |= NWS_SCHEDULED; 1038 dosignal = 1; 1039 } else 1040 dosignal = 0; 1041 NWS_UNLOCK(nwsp); 1042 if (dosignal) 1043 NWS_SIGNAL(nwsp); 1044 error = 0; 1045 goto out_unpin; 1046 1047 queue_fallback: 1048 error = netisr_queue_internal(proto, m, cpuid); 1049 out_unpin: 1050 sched_unpin(); 1051 out_unlock: 1052 #ifdef NETISR_LOCKING 1053 NETISR_RUNLOCK(&tracker); 1054 #endif 1055 return (error); 1056 } 1057 1058 int 1059 netisr_dispatch(u_int proto, struct mbuf *m) 1060 { 1061 1062 return (netisr_dispatch_src(proto, 0, m)); 1063 } 1064 1065 #ifdef DEVICE_POLLING 1066 /* 1067 * Kernel polling borrows a netisr thread to run interface polling in; this 1068 * function allows kernel polling to request that the netisr thread be 1069 * scheduled even if no packets are pending for protocols. 1070 */ 1071 void 1072 netisr_sched_poll(void) 1073 { 1074 struct netisr_workstream *nwsp; 1075 1076 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1077 NWS_SIGNAL(nwsp); 1078 } 1079 #endif 1080 1081 static void 1082 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1083 { 1084 char swiname[12]; 1085 struct netisr_workstream *nwsp; 1086 int error; 1087 1088 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1089 1090 nwsp = DPCPU_ID_PTR(cpuid, nws); 1091 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1092 nwsp->nws_cpu = cpuid; 1093 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1094 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1095 SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); 1096 if (error) 1097 panic("%s: swi_add %d", __func__, error); 1098 pc->pc_netisr = nwsp->nws_intr_event; 1099 if (netisr_bindthreads) { 1100 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1101 if (error != 0) 1102 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1103 cpuid, error); 1104 } 1105 NETISR_WLOCK(); 1106 nws_array[nws_count] = nwsp->nws_cpu; 1107 nws_count++; 1108 NETISR_WUNLOCK(); 1109 } 1110 1111 /* 1112 * Initialize the netisr subsystem. We rely on BSS and static initialization 1113 * of most fields in global data structures. 1114 * 1115 * Start a worker thread for the boot CPU so that we can support network 1116 * traffic immediately in case the network stack is used before additional 1117 * CPUs are started (for example, diskless boot). 1118 */ 1119 static void 1120 netisr_init(void *arg) 1121 { 1122 KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); 1123 1124 NETISR_LOCK_INIT(); 1125 if (netisr_maxthreads == 0 || netisr_maxthreads < -1 ) 1126 netisr_maxthreads = 1; /* default behavior */ 1127 else if (netisr_maxthreads == -1) 1128 netisr_maxthreads = mp_ncpus; /* use max cpus */ 1129 if (netisr_maxthreads > mp_ncpus) { 1130 printf("netisr_init: forcing maxthreads from %d to %d\n", 1131 netisr_maxthreads, mp_ncpus); 1132 netisr_maxthreads = mp_ncpus; 1133 } 1134 if (netisr_defaultqlimit > netisr_maxqlimit) { 1135 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1136 netisr_defaultqlimit, netisr_maxqlimit); 1137 netisr_defaultqlimit = netisr_maxqlimit; 1138 } 1139 #ifdef DEVICE_POLLING 1140 /* 1141 * The device polling code is not yet aware of how to deal with 1142 * multiple netisr threads, so for the time being compiling in device 1143 * polling disables parallel netisr workers. 1144 */ 1145 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1146 printf("netisr_init: forcing maxthreads to 1 and " 1147 "bindthreads to 0 for device polling\n"); 1148 netisr_maxthreads = 1; 1149 netisr_bindthreads = 0; 1150 } 1151 #endif 1152 netisr_start_swi(curcpu, pcpu_find(curcpu)); 1153 } 1154 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1155 1156 /* 1157 * Start worker threads for additional CPUs. No attempt to gracefully handle 1158 * work reassignment, we don't yet support dynamic reconfiguration. 1159 */ 1160 static void 1161 netisr_start(void *arg) 1162 { 1163 struct pcpu *pc; 1164 1165 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1166 if (nws_count >= netisr_maxthreads) 1167 break; 1168 /* Worker will already be present for boot CPU. */ 1169 if (pc->pc_netisr != NULL) 1170 continue; 1171 netisr_start_swi(pc->pc_cpuid, pc); 1172 } 1173 } 1174 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1175 1176 /* 1177 * Sysctl monitoring for netisr: query a list of registered protocols. 1178 */ 1179 static int 1180 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1181 { 1182 struct rm_priotracker tracker; 1183 struct sysctl_netisr_proto *snpp, *snp_array; 1184 struct netisr_proto *npp; 1185 u_int counter, proto; 1186 int error; 1187 1188 if (req->newptr != NULL) 1189 return (EINVAL); 1190 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1191 M_ZERO | M_WAITOK); 1192 counter = 0; 1193 NETISR_RLOCK(&tracker); 1194 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1195 npp = &netisr_proto[proto]; 1196 if (npp->np_name == NULL) 1197 continue; 1198 snpp = &snp_array[counter]; 1199 snpp->snp_version = sizeof(*snpp); 1200 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1201 snpp->snp_proto = proto; 1202 snpp->snp_qlimit = npp->np_qlimit; 1203 snpp->snp_policy = npp->np_policy; 1204 snpp->snp_dispatch = npp->np_dispatch; 1205 if (npp->np_m2flow != NULL) 1206 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1207 if (npp->np_m2cpuid != NULL) 1208 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1209 if (npp->np_drainedcpu != NULL) 1210 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1211 counter++; 1212 } 1213 NETISR_RUNLOCK(&tracker); 1214 KASSERT(counter <= NETISR_MAXPROT, 1215 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1216 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1217 free(snp_array, M_TEMP); 1218 return (error); 1219 } 1220 1221 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1222 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1223 "S,sysctl_netisr_proto", 1224 "Return list of protocols registered with netisr"); 1225 1226 /* 1227 * Sysctl monitoring for netisr: query a list of workstreams. 1228 */ 1229 static int 1230 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1231 { 1232 struct rm_priotracker tracker; 1233 struct sysctl_netisr_workstream *snwsp, *snws_array; 1234 struct netisr_workstream *nwsp; 1235 u_int counter, cpuid; 1236 int error; 1237 1238 if (req->newptr != NULL) 1239 return (EINVAL); 1240 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1241 M_ZERO | M_WAITOK); 1242 counter = 0; 1243 NETISR_RLOCK(&tracker); 1244 CPU_FOREACH(cpuid) { 1245 nwsp = DPCPU_ID_PTR(cpuid, nws); 1246 if (nwsp->nws_intr_event == NULL) 1247 continue; 1248 NWS_LOCK(nwsp); 1249 snwsp = &snws_array[counter]; 1250 snwsp->snws_version = sizeof(*snwsp); 1251 1252 /* 1253 * For now, we equate workstream IDs and CPU IDs in the 1254 * kernel, but expose them independently to userspace in case 1255 * that assumption changes in the future. 1256 */ 1257 snwsp->snws_wsid = cpuid; 1258 snwsp->snws_cpu = cpuid; 1259 if (nwsp->nws_intr_event != NULL) 1260 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1261 NWS_UNLOCK(nwsp); 1262 counter++; 1263 } 1264 NETISR_RUNLOCK(&tracker); 1265 KASSERT(counter <= MAXCPU, 1266 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1267 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1268 free(snws_array, M_TEMP); 1269 return (error); 1270 } 1271 1272 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1273 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1274 "S,sysctl_netisr_workstream", 1275 "Return list of workstreams implemented by netisr"); 1276 1277 /* 1278 * Sysctl monitoring for netisr: query per-protocol data across all 1279 * workstreams. 1280 */ 1281 static int 1282 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1283 { 1284 struct rm_priotracker tracker; 1285 struct sysctl_netisr_work *snwp, *snw_array; 1286 struct netisr_workstream *nwsp; 1287 struct netisr_proto *npp; 1288 struct netisr_work *nwp; 1289 u_int counter, cpuid, proto; 1290 int error; 1291 1292 if (req->newptr != NULL) 1293 return (EINVAL); 1294 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1295 M_TEMP, M_ZERO | M_WAITOK); 1296 counter = 0; 1297 NETISR_RLOCK(&tracker); 1298 CPU_FOREACH(cpuid) { 1299 nwsp = DPCPU_ID_PTR(cpuid, nws); 1300 if (nwsp->nws_intr_event == NULL) 1301 continue; 1302 NWS_LOCK(nwsp); 1303 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1304 npp = &netisr_proto[proto]; 1305 if (npp->np_name == NULL) 1306 continue; 1307 nwp = &nwsp->nws_work[proto]; 1308 snwp = &snw_array[counter]; 1309 snwp->snw_version = sizeof(*snwp); 1310 snwp->snw_wsid = cpuid; /* See comment above. */ 1311 snwp->snw_proto = proto; 1312 snwp->snw_len = nwp->nw_len; 1313 snwp->snw_watermark = nwp->nw_watermark; 1314 snwp->snw_dispatched = nwp->nw_dispatched; 1315 snwp->snw_hybrid_dispatched = 1316 nwp->nw_hybrid_dispatched; 1317 snwp->snw_qdrops = nwp->nw_qdrops; 1318 snwp->snw_queued = nwp->nw_queued; 1319 snwp->snw_handled = nwp->nw_handled; 1320 counter++; 1321 } 1322 NWS_UNLOCK(nwsp); 1323 } 1324 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1325 ("sysctl_netisr_work: counter too big (%d)", counter)); 1326 NETISR_RUNLOCK(&tracker); 1327 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1328 free(snw_array, M_TEMP); 1329 return (error); 1330 } 1331 1332 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1333 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1334 "S,sysctl_netisr_work", 1335 "Return list of per-workstream, per-protocol work in netisr"); 1336 1337 #ifdef DDB 1338 DB_SHOW_COMMAND(netisr, db_show_netisr) 1339 { 1340 struct netisr_workstream *nwsp; 1341 struct netisr_work *nwp; 1342 int first, proto; 1343 u_int cpuid; 1344 1345 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1346 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1347 CPU_FOREACH(cpuid) { 1348 nwsp = DPCPU_ID_PTR(cpuid, nws); 1349 if (nwsp->nws_intr_event == NULL) 1350 continue; 1351 first = 1; 1352 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1353 if (netisr_proto[proto].np_handler == NULL) 1354 continue; 1355 nwp = &nwsp->nws_work[proto]; 1356 if (first) { 1357 db_printf("%3d ", cpuid); 1358 first = 0; 1359 } else 1360 db_printf("%3s ", ""); 1361 db_printf( 1362 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1363 netisr_proto[proto].np_name, nwp->nw_len, 1364 nwp->nw_watermark, nwp->nw_qlimit, 1365 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1366 nwp->nw_qdrops, nwp->nw_queued); 1367 } 1368 } 1369 } 1370 #endif 1371