1 /*- 2 * Copyright (c) 2007-2009 Robert N. M. Watson 3 * Copyright (c) 2010-2011 Juniper Networks, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Robert N. M. Watson under contract 7 * to Juniper Networks, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 /* 35 * netisr is a packet dispatch service, allowing synchronous (directly 36 * dispatched) and asynchronous (deferred dispatch) processing of packets by 37 * registered protocol handlers. Callers pass a protocol identifier and 38 * packet to netisr, along with a direct dispatch hint, and work will either 39 * be immediately processed by the registered handler, or passed to a 40 * software interrupt (SWI) thread for deferred dispatch. Callers will 41 * generally select one or the other based on: 42 * 43 * - Whether directly dispatching a netisr handler lead to code reentrance or 44 * lock recursion, such as entering the socket code from the socket code. 45 * - Whether directly dispatching a netisr handler lead to recursive 46 * processing, such as when decapsulating several wrapped layers of tunnel 47 * information (IPSEC within IPSEC within ...). 48 * 49 * Maintaining ordering for protocol streams is a critical design concern. 50 * Enforcing ordering limits the opportunity for concurrency, but maintains 51 * the strong ordering requirements found in some protocols, such as TCP. Of 52 * related concern is CPU affinity--it is desirable to process all data 53 * associated with a particular stream on the same CPU over time in order to 54 * avoid acquiring locks associated with the connection on different CPUs, 55 * keep connection data in one cache, and to generally encourage associated 56 * user threads to live on the same CPU as the stream. It's also desirable 57 * to avoid lock migration and contention where locks are associated with 58 * more than one flow. 59 * 60 * netisr supports several policy variations, represented by the 61 * NETISR_POLICY_* constants, allowing protocols to play various roles in 62 * identifying flows, assigning work to CPUs, etc. These are described in 63 * netisr.h. 64 */ 65 66 #include "opt_ddb.h" 67 #include "opt_device_polling.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/kernel.h> 72 #include <sys/kthread.h> 73 #include <sys/interrupt.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/mutex.h> 77 #include <sys/pcpu.h> 78 #include <sys/proc.h> 79 #include <sys/rmlock.h> 80 #include <sys/sched.h> 81 #include <sys/smp.h> 82 #include <sys/socket.h> 83 #include <sys/sysctl.h> 84 #include <sys/systm.h> 85 86 #ifdef DDB 87 #include <ddb/ddb.h> 88 #endif 89 90 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 91 #include <net/if.h> 92 #include <net/if_var.h> 93 #include <net/netisr.h> 94 #include <net/netisr_internal.h> 95 #include <net/vnet.h> 96 97 /*- 98 * Synchronize use and modification of the registered netisr data structures; 99 * acquire a read lock while modifying the set of registered protocols to 100 * prevent partially registered or unregistered protocols from being run. 101 * 102 * The following data structures and fields are protected by this lock: 103 * 104 * - The netisr_proto array, including all fields of struct netisr_proto. 105 * - The nws array, including all fields of struct netisr_worker. 106 * - The nws_array array. 107 * 108 * Note: the NETISR_LOCKING define controls whether read locks are acquired 109 * in packet processing paths requiring netisr registration stability. This 110 * is disabled by default as it can lead to measurable performance 111 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 112 * because netisr registration and unregistration is extremely rare at 113 * runtime. If it becomes more common, this decision should be revisited. 114 * 115 * XXXRW: rmlocks don't support assertions. 116 */ 117 static struct rmlock netisr_rmlock; 118 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 119 RM_NOWITNESS) 120 #define NETISR_LOCK_ASSERT() 121 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 122 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 123 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 124 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 125 /* #define NETISR_LOCKING */ 126 127 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 128 129 /*- 130 * Three global direct dispatch policies are supported: 131 * 132 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 133 * context (may be overriden by protocols). 134 * 135 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 136 * and we're running on the CPU the work would be performed on, then direct 137 * dispatch it if it wouldn't violate ordering constraints on the workstream. 138 * 139 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 140 * always direct dispatch. (The default.) 141 * 142 * Notice that changing the global policy could lead to short periods of 143 * misordered processing, but this is considered acceptable as compared to 144 * the complexity of enforcing ordering during policy changes. Protocols can 145 * override the global policy (when they're not doing that, they select 146 * NETISR_DISPATCH_DEFAULT). 147 */ 148 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 149 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 150 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 151 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 152 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN, 153 0, 0, sysctl_netisr_dispatch_policy, "A", 154 "netisr dispatch policy"); 155 156 /* 157 * Allow the administrator to limit the number of threads (CPUs) to use for 158 * netisr. We don't check netisr_maxthreads before creating the thread for 159 * CPU 0, so in practice we ignore values <= 1. This must be set at boot. 160 * We will create at most one thread per CPU. 161 */ 162 static int netisr_maxthreads = -1; /* Max number of threads. */ 163 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 164 &netisr_maxthreads, 0, 165 "Use at most this many CPUs for netisr processing"); 166 167 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 168 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 169 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 170 171 /* 172 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 173 * both for initial configuration and later modification using 174 * netisr_setqlimit(). 175 */ 176 #define NETISR_DEFAULT_MAXQLIMIT 10240 177 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 178 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 179 &netisr_maxqlimit, 0, 180 "Maximum netisr per-protocol, per-CPU queue depth."); 181 182 /* 183 * The default per-workstream mbuf queue limit for protocols that don't 184 * initialize the nh_qlimit field of their struct netisr_handler. If this is 185 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 186 */ 187 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 188 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 189 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 190 &netisr_defaultqlimit, 0, 191 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 192 193 /* 194 * Store and export the compile-time constant NETISR_MAXPROT limit on the 195 * number of protocols that can register with netisr at a time. This is 196 * required for crashdump analysis, as it sizes netisr_proto[]. 197 */ 198 static u_int netisr_maxprot = NETISR_MAXPROT; 199 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 200 &netisr_maxprot, 0, 201 "Compile-time limit on the number of protocols supported by netisr."); 202 203 /* 204 * The netisr_proto array describes all registered protocols, indexed by 205 * protocol number. See netisr_internal.h for more details. 206 */ 207 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 208 209 /* 210 * Per-CPU workstream data. See netisr_internal.h for more details. 211 */ 212 DPCPU_DEFINE(struct netisr_workstream, nws); 213 214 /* 215 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 216 * accessing workstreams. This allows constructions of the form 217 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 218 */ 219 static u_int nws_array[MAXCPU]; 220 221 /* 222 * Number of registered workstreams. Will be at most the number of running 223 * CPUs once fully started. 224 */ 225 static u_int nws_count; 226 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 227 &nws_count, 0, "Number of extant netisr threads."); 228 229 /* 230 * Synchronization for each workstream: a mutex protects all mutable fields 231 * in each stream, including per-protocol state (mbuf queues). The SWI is 232 * woken up if asynchronous dispatch is required. 233 */ 234 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 235 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 236 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 237 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 238 239 /* 240 * Utility routines for protocols that implement their own mapping of flows 241 * to CPUs. 242 */ 243 u_int 244 netisr_get_cpucount(void) 245 { 246 247 return (nws_count); 248 } 249 250 u_int 251 netisr_get_cpuid(u_int cpunumber) 252 { 253 254 KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, 255 nws_count)); 256 257 return (nws_array[cpunumber]); 258 } 259 260 /* 261 * The default implementation of flow -> CPU ID mapping. 262 * 263 * Non-static so that protocols can use it to map their own work to specific 264 * CPUs in a manner consistent to netisr for affinity purposes. 265 */ 266 u_int 267 netisr_default_flow2cpu(u_int flowid) 268 { 269 270 return (nws_array[flowid % nws_count]); 271 } 272 273 /* 274 * Dispatch tunable and sysctl configuration. 275 */ 276 struct netisr_dispatch_table_entry { 277 u_int ndte_policy; 278 const char *ndte_policy_str; 279 }; 280 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 281 { NETISR_DISPATCH_DEFAULT, "default" }, 282 { NETISR_DISPATCH_DEFERRED, "deferred" }, 283 { NETISR_DISPATCH_HYBRID, "hybrid" }, 284 { NETISR_DISPATCH_DIRECT, "direct" }, 285 }; 286 static const u_int netisr_dispatch_table_len = 287 (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0])); 288 289 static void 290 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 291 u_int buflen) 292 { 293 const struct netisr_dispatch_table_entry *ndtep; 294 const char *str; 295 u_int i; 296 297 str = "unknown"; 298 for (i = 0; i < netisr_dispatch_table_len; i++) { 299 ndtep = &netisr_dispatch_table[i]; 300 if (ndtep->ndte_policy == dispatch_policy) { 301 str = ndtep->ndte_policy_str; 302 break; 303 } 304 } 305 snprintf(buffer, buflen, "%s", str); 306 } 307 308 static int 309 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 310 { 311 const struct netisr_dispatch_table_entry *ndtep; 312 u_int i; 313 314 for (i = 0; i < netisr_dispatch_table_len; i++) { 315 ndtep = &netisr_dispatch_table[i]; 316 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 317 *dispatch_policyp = ndtep->ndte_policy; 318 return (0); 319 } 320 } 321 return (EINVAL); 322 } 323 324 static int 325 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 326 { 327 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 328 u_int dispatch_policy; 329 int error; 330 331 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 332 sizeof(tmp)); 333 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 334 if (error == 0 && req->newptr != NULL) { 335 error = netisr_dispatch_policy_from_str(tmp, 336 &dispatch_policy); 337 if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) 338 error = EINVAL; 339 if (error == 0) 340 netisr_dispatch_policy = dispatch_policy; 341 } 342 return (error); 343 } 344 345 /* 346 * Register a new netisr handler, which requires initializing per-protocol 347 * fields for each workstream. All netisr work is briefly suspended while 348 * the protocol is installed. 349 */ 350 void 351 netisr_register(const struct netisr_handler *nhp) 352 { 353 struct netisr_work *npwp; 354 const char *name; 355 u_int i, proto; 356 357 proto = nhp->nh_proto; 358 name = nhp->nh_name; 359 360 /* 361 * Test that the requested registration is valid. 362 */ 363 KASSERT(nhp->nh_name != NULL, 364 ("%s: nh_name NULL for %u", __func__, proto)); 365 KASSERT(nhp->nh_handler != NULL, 366 ("%s: nh_handler NULL for %s", __func__, name)); 367 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 368 nhp->nh_policy == NETISR_POLICY_FLOW || 369 nhp->nh_policy == NETISR_POLICY_CPU, 370 ("%s: unsupported nh_policy %u for %s", __func__, 371 nhp->nh_policy, name)); 372 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 373 nhp->nh_m2flow == NULL, 374 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 375 name)); 376 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 377 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 378 name)); 379 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 380 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 381 name)); 382 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 383 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 384 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 385 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 386 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 387 388 KASSERT(proto < NETISR_MAXPROT, 389 ("%s(%u, %s): protocol too big", __func__, proto, name)); 390 391 /* 392 * Test that no existing registration exists for this protocol. 393 */ 394 NETISR_WLOCK(); 395 KASSERT(netisr_proto[proto].np_name == NULL, 396 ("%s(%u, %s): name present", __func__, proto, name)); 397 KASSERT(netisr_proto[proto].np_handler == NULL, 398 ("%s(%u, %s): handler present", __func__, proto, name)); 399 400 netisr_proto[proto].np_name = name; 401 netisr_proto[proto].np_handler = nhp->nh_handler; 402 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 403 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 404 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 405 if (nhp->nh_qlimit == 0) 406 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 407 else if (nhp->nh_qlimit > netisr_maxqlimit) { 408 printf("%s: %s requested queue limit %u capped to " 409 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 410 netisr_maxqlimit); 411 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 412 } else 413 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 414 netisr_proto[proto].np_policy = nhp->nh_policy; 415 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 416 CPU_FOREACH(i) { 417 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 418 bzero(npwp, sizeof(*npwp)); 419 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 420 } 421 NETISR_WUNLOCK(); 422 } 423 424 /* 425 * Clear drop counters across all workstreams for a protocol. 426 */ 427 void 428 netisr_clearqdrops(const struct netisr_handler *nhp) 429 { 430 struct netisr_work *npwp; 431 #ifdef INVARIANTS 432 const char *name; 433 #endif 434 u_int i, proto; 435 436 proto = nhp->nh_proto; 437 #ifdef INVARIANTS 438 name = nhp->nh_name; 439 #endif 440 KASSERT(proto < NETISR_MAXPROT, 441 ("%s(%u): protocol too big for %s", __func__, proto, name)); 442 443 NETISR_WLOCK(); 444 KASSERT(netisr_proto[proto].np_handler != NULL, 445 ("%s(%u): protocol not registered for %s", __func__, proto, 446 name)); 447 448 CPU_FOREACH(i) { 449 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 450 npwp->nw_qdrops = 0; 451 } 452 NETISR_WUNLOCK(); 453 } 454 455 /* 456 * Query current drop counters across all workstreams for a protocol. 457 */ 458 void 459 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 460 { 461 struct netisr_work *npwp; 462 struct rm_priotracker tracker; 463 #ifdef INVARIANTS 464 const char *name; 465 #endif 466 u_int i, proto; 467 468 *qdropp = 0; 469 proto = nhp->nh_proto; 470 #ifdef INVARIANTS 471 name = nhp->nh_name; 472 #endif 473 KASSERT(proto < NETISR_MAXPROT, 474 ("%s(%u): protocol too big for %s", __func__, proto, name)); 475 476 NETISR_RLOCK(&tracker); 477 KASSERT(netisr_proto[proto].np_handler != NULL, 478 ("%s(%u): protocol not registered for %s", __func__, proto, 479 name)); 480 481 CPU_FOREACH(i) { 482 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 483 *qdropp += npwp->nw_qdrops; 484 } 485 NETISR_RUNLOCK(&tracker); 486 } 487 488 /* 489 * Query current per-workstream queue limit for a protocol. 490 */ 491 void 492 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 493 { 494 struct rm_priotracker tracker; 495 #ifdef INVARIANTS 496 const char *name; 497 #endif 498 u_int proto; 499 500 proto = nhp->nh_proto; 501 #ifdef INVARIANTS 502 name = nhp->nh_name; 503 #endif 504 KASSERT(proto < NETISR_MAXPROT, 505 ("%s(%u): protocol too big for %s", __func__, proto, name)); 506 507 NETISR_RLOCK(&tracker); 508 KASSERT(netisr_proto[proto].np_handler != NULL, 509 ("%s(%u): protocol not registered for %s", __func__, proto, 510 name)); 511 *qlimitp = netisr_proto[proto].np_qlimit; 512 NETISR_RUNLOCK(&tracker); 513 } 514 515 /* 516 * Update the queue limit across per-workstream queues for a protocol. We 517 * simply change the limits, and don't drain overflowed packets as they will 518 * (hopefully) take care of themselves shortly. 519 */ 520 int 521 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 522 { 523 struct netisr_work *npwp; 524 #ifdef INVARIANTS 525 const char *name; 526 #endif 527 u_int i, proto; 528 529 if (qlimit > netisr_maxqlimit) 530 return (EINVAL); 531 532 proto = nhp->nh_proto; 533 #ifdef INVARIANTS 534 name = nhp->nh_name; 535 #endif 536 KASSERT(proto < NETISR_MAXPROT, 537 ("%s(%u): protocol too big for %s", __func__, proto, name)); 538 539 NETISR_WLOCK(); 540 KASSERT(netisr_proto[proto].np_handler != NULL, 541 ("%s(%u): protocol not registered for %s", __func__, proto, 542 name)); 543 544 netisr_proto[proto].np_qlimit = qlimit; 545 CPU_FOREACH(i) { 546 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 547 npwp->nw_qlimit = qlimit; 548 } 549 NETISR_WUNLOCK(); 550 return (0); 551 } 552 553 /* 554 * Drain all packets currently held in a particular protocol work queue. 555 */ 556 static void 557 netisr_drain_proto(struct netisr_work *npwp) 558 { 559 struct mbuf *m; 560 561 /* 562 * We would assert the lock on the workstream but it's not passed in. 563 */ 564 while ((m = npwp->nw_head) != NULL) { 565 npwp->nw_head = m->m_nextpkt; 566 m->m_nextpkt = NULL; 567 if (npwp->nw_head == NULL) 568 npwp->nw_tail = NULL; 569 npwp->nw_len--; 570 m_freem(m); 571 } 572 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 573 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 574 } 575 576 /* 577 * Remove the registration of a network protocol, which requires clearing 578 * per-protocol fields across all workstreams, including freeing all mbufs in 579 * the queues at time of unregister. All work in netisr is briefly suspended 580 * while this takes place. 581 */ 582 void 583 netisr_unregister(const struct netisr_handler *nhp) 584 { 585 struct netisr_work *npwp; 586 #ifdef INVARIANTS 587 const char *name; 588 #endif 589 u_int i, proto; 590 591 proto = nhp->nh_proto; 592 #ifdef INVARIANTS 593 name = nhp->nh_name; 594 #endif 595 KASSERT(proto < NETISR_MAXPROT, 596 ("%s(%u): protocol too big for %s", __func__, proto, name)); 597 598 NETISR_WLOCK(); 599 KASSERT(netisr_proto[proto].np_handler != NULL, 600 ("%s(%u): protocol not registered for %s", __func__, proto, 601 name)); 602 603 netisr_proto[proto].np_name = NULL; 604 netisr_proto[proto].np_handler = NULL; 605 netisr_proto[proto].np_m2flow = NULL; 606 netisr_proto[proto].np_m2cpuid = NULL; 607 netisr_proto[proto].np_qlimit = 0; 608 netisr_proto[proto].np_policy = 0; 609 CPU_FOREACH(i) { 610 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 611 netisr_drain_proto(npwp); 612 bzero(npwp, sizeof(*npwp)); 613 } 614 NETISR_WUNLOCK(); 615 } 616 617 /* 618 * Compose the global and per-protocol policies on dispatch, and return the 619 * dispatch policy to use. 620 */ 621 static u_int 622 netisr_get_dispatch(struct netisr_proto *npp) 623 { 624 625 /* 626 * Protocol-specific configuration overrides the global default. 627 */ 628 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 629 return (npp->np_dispatch); 630 return (netisr_dispatch_policy); 631 } 632 633 /* 634 * Look up the workstream given a packet and source identifier. Do this by 635 * checking the protocol's policy, and optionally call out to the protocol 636 * for assistance if required. 637 */ 638 static struct mbuf * 639 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 640 uintptr_t source, struct mbuf *m, u_int *cpuidp) 641 { 642 struct ifnet *ifp; 643 u_int policy; 644 645 NETISR_LOCK_ASSERT(); 646 647 /* 648 * In the event we have only one worker, shortcut and deliver to it 649 * without further ado. 650 */ 651 if (nws_count == 1) { 652 *cpuidp = nws_array[0]; 653 return (m); 654 } 655 656 /* 657 * What happens next depends on the policy selected by the protocol. 658 * If we want to support per-interface policies, we should do that 659 * here first. 660 */ 661 policy = npp->np_policy; 662 if (policy == NETISR_POLICY_CPU) { 663 m = npp->np_m2cpuid(m, source, cpuidp); 664 if (m == NULL) 665 return (NULL); 666 667 /* 668 * It's possible for a protocol not to have a good idea about 669 * where to process a packet, in which case we fall back on 670 * the netisr code to decide. In the hybrid case, return the 671 * current CPU ID, which will force an immediate direct 672 * dispatch. In the queued case, fall back on the SOURCE 673 * policy. 674 */ 675 if (*cpuidp != NETISR_CPUID_NONE) 676 return (m); 677 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 678 *cpuidp = curcpu; 679 return (m); 680 } 681 policy = NETISR_POLICY_SOURCE; 682 } 683 684 if (policy == NETISR_POLICY_FLOW) { 685 if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) { 686 m = npp->np_m2flow(m, source); 687 if (m == NULL) 688 return (NULL); 689 } 690 if (m->m_flags & M_FLOWID) { 691 *cpuidp = 692 netisr_default_flow2cpu(m->m_pkthdr.flowid); 693 return (m); 694 } 695 policy = NETISR_POLICY_SOURCE; 696 } 697 698 KASSERT(policy == NETISR_POLICY_SOURCE, 699 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 700 npp->np_name)); 701 702 ifp = m->m_pkthdr.rcvif; 703 if (ifp != NULL) 704 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 705 else 706 *cpuidp = nws_array[source % nws_count]; 707 return (m); 708 } 709 710 /* 711 * Process packets associated with a workstream and protocol. For reasons of 712 * fairness, we process up to one complete netisr queue at a time, moving the 713 * queue to a stack-local queue for processing, but do not loop refreshing 714 * from the global queue. The caller is responsible for deciding whether to 715 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 716 * locked on entry and relocked before return, but will be released while 717 * processing. The number of packets processed is returned. 718 */ 719 static u_int 720 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 721 { 722 struct netisr_work local_npw, *npwp; 723 u_int handled; 724 struct mbuf *m; 725 726 NETISR_LOCK_ASSERT(); 727 NWS_LOCK_ASSERT(nwsp); 728 729 KASSERT(nwsp->nws_flags & NWS_RUNNING, 730 ("%s(%u): not running", __func__, proto)); 731 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 732 ("%s(%u): invalid proto\n", __func__, proto)); 733 734 npwp = &nwsp->nws_work[proto]; 735 if (npwp->nw_len == 0) 736 return (0); 737 738 /* 739 * Move the global work queue to a thread-local work queue. 740 * 741 * Notice that this means the effective maximum length of the queue 742 * is actually twice that of the maximum queue length specified in 743 * the protocol registration call. 744 */ 745 handled = npwp->nw_len; 746 local_npw = *npwp; 747 npwp->nw_head = NULL; 748 npwp->nw_tail = NULL; 749 npwp->nw_len = 0; 750 nwsp->nws_pendingbits &= ~(1 << proto); 751 NWS_UNLOCK(nwsp); 752 while ((m = local_npw.nw_head) != NULL) { 753 local_npw.nw_head = m->m_nextpkt; 754 m->m_nextpkt = NULL; 755 if (local_npw.nw_head == NULL) 756 local_npw.nw_tail = NULL; 757 local_npw.nw_len--; 758 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 759 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 760 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 761 netisr_proto[proto].np_handler(m); 762 CURVNET_RESTORE(); 763 } 764 KASSERT(local_npw.nw_len == 0, 765 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 766 if (netisr_proto[proto].np_drainedcpu) 767 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 768 NWS_LOCK(nwsp); 769 npwp->nw_handled += handled; 770 return (handled); 771 } 772 773 /* 774 * SWI handler for netisr -- processes packets in a set of workstreams that 775 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 776 * being direct dispatched, go back to sleep and wait for the dispatching 777 * thread to wake us up again. 778 */ 779 static void 780 swi_net(void *arg) 781 { 782 #ifdef NETISR_LOCKING 783 struct rm_priotracker tracker; 784 #endif 785 struct netisr_workstream *nwsp; 786 u_int bits, prot; 787 788 nwsp = arg; 789 790 #ifdef DEVICE_POLLING 791 KASSERT(nws_count == 1, 792 ("%s: device_polling but nws_count != 1", __func__)); 793 netisr_poll(); 794 #endif 795 #ifdef NETISR_LOCKING 796 NETISR_RLOCK(&tracker); 797 #endif 798 NWS_LOCK(nwsp); 799 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 800 if (nwsp->nws_flags & NWS_DISPATCHING) 801 goto out; 802 nwsp->nws_flags |= NWS_RUNNING; 803 nwsp->nws_flags &= ~NWS_SCHEDULED; 804 while ((bits = nwsp->nws_pendingbits) != 0) { 805 while ((prot = ffs(bits)) != 0) { 806 prot--; 807 bits &= ~(1 << prot); 808 (void)netisr_process_workstream_proto(nwsp, prot); 809 } 810 } 811 nwsp->nws_flags &= ~NWS_RUNNING; 812 out: 813 NWS_UNLOCK(nwsp); 814 #ifdef NETISR_LOCKING 815 NETISR_RUNLOCK(&tracker); 816 #endif 817 #ifdef DEVICE_POLLING 818 netisr_pollmore(); 819 #endif 820 } 821 822 static int 823 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 824 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 825 { 826 827 NWS_LOCK_ASSERT(nwsp); 828 829 *dosignalp = 0; 830 if (npwp->nw_len < npwp->nw_qlimit) { 831 m->m_nextpkt = NULL; 832 if (npwp->nw_head == NULL) { 833 npwp->nw_head = m; 834 npwp->nw_tail = m; 835 } else { 836 npwp->nw_tail->m_nextpkt = m; 837 npwp->nw_tail = m; 838 } 839 npwp->nw_len++; 840 if (npwp->nw_len > npwp->nw_watermark) 841 npwp->nw_watermark = npwp->nw_len; 842 843 /* 844 * We must set the bit regardless of NWS_RUNNING, so that 845 * swi_net() keeps calling netisr_process_workstream_proto(). 846 */ 847 nwsp->nws_pendingbits |= (1 << proto); 848 if (!(nwsp->nws_flags & 849 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 850 nwsp->nws_flags |= NWS_SCHEDULED; 851 *dosignalp = 1; /* Defer until unlocked. */ 852 } 853 npwp->nw_queued++; 854 return (0); 855 } else { 856 m_freem(m); 857 npwp->nw_qdrops++; 858 return (ENOBUFS); 859 } 860 } 861 862 static int 863 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 864 { 865 struct netisr_workstream *nwsp; 866 struct netisr_work *npwp; 867 int dosignal, error; 868 869 #ifdef NETISR_LOCKING 870 NETISR_LOCK_ASSERT(); 871 #endif 872 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 873 cpuid, mp_maxid)); 874 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 875 876 dosignal = 0; 877 error = 0; 878 nwsp = DPCPU_ID_PTR(cpuid, nws); 879 npwp = &nwsp->nws_work[proto]; 880 NWS_LOCK(nwsp); 881 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 882 NWS_UNLOCK(nwsp); 883 if (dosignal) 884 NWS_SIGNAL(nwsp); 885 return (error); 886 } 887 888 int 889 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 890 { 891 #ifdef NETISR_LOCKING 892 struct rm_priotracker tracker; 893 #endif 894 u_int cpuid; 895 int error; 896 897 KASSERT(proto < NETISR_MAXPROT, 898 ("%s: invalid proto %u", __func__, proto)); 899 900 #ifdef NETISR_LOCKING 901 NETISR_RLOCK(&tracker); 902 #endif 903 KASSERT(netisr_proto[proto].np_handler != NULL, 904 ("%s: invalid proto %u", __func__, proto)); 905 906 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 907 source, m, &cpuid); 908 if (m != NULL) { 909 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 910 cpuid)); 911 error = netisr_queue_internal(proto, m, cpuid); 912 } else 913 error = ENOBUFS; 914 #ifdef NETISR_LOCKING 915 NETISR_RUNLOCK(&tracker); 916 #endif 917 return (error); 918 } 919 920 int 921 netisr_queue(u_int proto, struct mbuf *m) 922 { 923 924 return (netisr_queue_src(proto, 0, m)); 925 } 926 927 /* 928 * Dispatch a packet for netisr processing; direct dispatch is permitted by 929 * calling context. 930 */ 931 int 932 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 933 { 934 #ifdef NETISR_LOCKING 935 struct rm_priotracker tracker; 936 #endif 937 struct netisr_workstream *nwsp; 938 struct netisr_proto *npp; 939 struct netisr_work *npwp; 940 int dosignal, error; 941 u_int cpuid, dispatch_policy; 942 943 KASSERT(proto < NETISR_MAXPROT, 944 ("%s: invalid proto %u", __func__, proto)); 945 #ifdef NETISR_LOCKING 946 NETISR_RLOCK(&tracker); 947 #endif 948 npp = &netisr_proto[proto]; 949 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 950 proto)); 951 952 dispatch_policy = netisr_get_dispatch(npp); 953 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 954 return (netisr_queue_src(proto, source, m)); 955 956 /* 957 * If direct dispatch is forced, then unconditionally dispatch 958 * without a formal CPU selection. Borrow the current CPU's stats, 959 * even if there's no worker on it. In this case we don't update 960 * nws_flags because all netisr processing will be source ordered due 961 * to always being forced to directly dispatch. 962 */ 963 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 964 nwsp = DPCPU_PTR(nws); 965 npwp = &nwsp->nws_work[proto]; 966 npwp->nw_dispatched++; 967 npwp->nw_handled++; 968 netisr_proto[proto].np_handler(m); 969 error = 0; 970 goto out_unlock; 971 } 972 973 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 974 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 975 976 /* 977 * Otherwise, we execute in a hybrid mode where we will try to direct 978 * dispatch if we're on the right CPU and the netisr worker isn't 979 * already running. 980 */ 981 sched_pin(); 982 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 983 source, m, &cpuid); 984 if (m == NULL) { 985 error = ENOBUFS; 986 goto out_unpin; 987 } 988 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 989 if (cpuid != curcpu) 990 goto queue_fallback; 991 nwsp = DPCPU_PTR(nws); 992 npwp = &nwsp->nws_work[proto]; 993 994 /*- 995 * We are willing to direct dispatch only if three conditions hold: 996 * 997 * (1) The netisr worker isn't already running, 998 * (2) Another thread isn't already directly dispatching, and 999 * (3) The netisr hasn't already been woken up. 1000 */ 1001 NWS_LOCK(nwsp); 1002 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1003 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1004 &dosignal); 1005 NWS_UNLOCK(nwsp); 1006 if (dosignal) 1007 NWS_SIGNAL(nwsp); 1008 goto out_unpin; 1009 } 1010 1011 /* 1012 * The current thread is now effectively the netisr worker, so set 1013 * the dispatching flag to prevent concurrent processing of the 1014 * stream from another thread (even the netisr worker), which could 1015 * otherwise lead to effective misordering of the stream. 1016 */ 1017 nwsp->nws_flags |= NWS_DISPATCHING; 1018 NWS_UNLOCK(nwsp); 1019 netisr_proto[proto].np_handler(m); 1020 NWS_LOCK(nwsp); 1021 nwsp->nws_flags &= ~NWS_DISPATCHING; 1022 npwp->nw_handled++; 1023 npwp->nw_hybrid_dispatched++; 1024 1025 /* 1026 * If other work was enqueued by another thread while we were direct 1027 * dispatching, we need to signal the netisr worker to do that work. 1028 * In the future, we might want to do some of that work in the 1029 * current thread, rather than trigger further context switches. If 1030 * so, we'll want to establish a reasonable bound on the work done in 1031 * the "borrowed" context. 1032 */ 1033 if (nwsp->nws_pendingbits != 0) { 1034 nwsp->nws_flags |= NWS_SCHEDULED; 1035 dosignal = 1; 1036 } else 1037 dosignal = 0; 1038 NWS_UNLOCK(nwsp); 1039 if (dosignal) 1040 NWS_SIGNAL(nwsp); 1041 error = 0; 1042 goto out_unpin; 1043 1044 queue_fallback: 1045 error = netisr_queue_internal(proto, m, cpuid); 1046 out_unpin: 1047 sched_unpin(); 1048 out_unlock: 1049 #ifdef NETISR_LOCKING 1050 NETISR_RUNLOCK(&tracker); 1051 #endif 1052 return (error); 1053 } 1054 1055 int 1056 netisr_dispatch(u_int proto, struct mbuf *m) 1057 { 1058 1059 return (netisr_dispatch_src(proto, 0, m)); 1060 } 1061 1062 #ifdef DEVICE_POLLING 1063 /* 1064 * Kernel polling borrows a netisr thread to run interface polling in; this 1065 * function allows kernel polling to request that the netisr thread be 1066 * scheduled even if no packets are pending for protocols. 1067 */ 1068 void 1069 netisr_sched_poll(void) 1070 { 1071 struct netisr_workstream *nwsp; 1072 1073 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1074 NWS_SIGNAL(nwsp); 1075 } 1076 #endif 1077 1078 static void 1079 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1080 { 1081 char swiname[12]; 1082 struct netisr_workstream *nwsp; 1083 int error; 1084 1085 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1086 1087 nwsp = DPCPU_ID_PTR(cpuid, nws); 1088 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1089 nwsp->nws_cpu = cpuid; 1090 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1091 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1092 SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); 1093 if (error) 1094 panic("%s: swi_add %d", __func__, error); 1095 pc->pc_netisr = nwsp->nws_intr_event; 1096 if (netisr_bindthreads) { 1097 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1098 if (error != 0) 1099 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1100 cpuid, error); 1101 } 1102 NETISR_WLOCK(); 1103 nws_array[nws_count] = nwsp->nws_cpu; 1104 nws_count++; 1105 NETISR_WUNLOCK(); 1106 } 1107 1108 /* 1109 * Initialize the netisr subsystem. We rely on BSS and static initialization 1110 * of most fields in global data structures. 1111 * 1112 * Start a worker thread for the boot CPU so that we can support network 1113 * traffic immediately in case the network stack is used before additional 1114 * CPUs are started (for example, diskless boot). 1115 */ 1116 static void 1117 netisr_init(void *arg) 1118 { 1119 KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); 1120 1121 NETISR_LOCK_INIT(); 1122 if (netisr_maxthreads < 1) 1123 netisr_maxthreads = 1; 1124 if (netisr_maxthreads > mp_ncpus) { 1125 printf("netisr_init: forcing maxthreads from %d to %d\n", 1126 netisr_maxthreads, mp_ncpus); 1127 netisr_maxthreads = mp_ncpus; 1128 } 1129 if (netisr_defaultqlimit > netisr_maxqlimit) { 1130 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1131 netisr_defaultqlimit, netisr_maxqlimit); 1132 netisr_defaultqlimit = netisr_maxqlimit; 1133 } 1134 #ifdef DEVICE_POLLING 1135 /* 1136 * The device polling code is not yet aware of how to deal with 1137 * multiple netisr threads, so for the time being compiling in device 1138 * polling disables parallel netisr workers. 1139 */ 1140 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1141 printf("netisr_init: forcing maxthreads to 1 and " 1142 "bindthreads to 0 for device polling\n"); 1143 netisr_maxthreads = 1; 1144 netisr_bindthreads = 0; 1145 } 1146 #endif 1147 netisr_start_swi(curcpu, pcpu_find(curcpu)); 1148 } 1149 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1150 1151 /* 1152 * Start worker threads for additional CPUs. No attempt to gracefully handle 1153 * work reassignment, we don't yet support dynamic reconfiguration. 1154 */ 1155 static void 1156 netisr_start(void *arg) 1157 { 1158 struct pcpu *pc; 1159 1160 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1161 if (nws_count >= netisr_maxthreads) 1162 break; 1163 /* XXXRW: Is skipping absent CPUs still required here? */ 1164 if (CPU_ABSENT(pc->pc_cpuid)) 1165 continue; 1166 /* Worker will already be present for boot CPU. */ 1167 if (pc->pc_netisr != NULL) 1168 continue; 1169 netisr_start_swi(pc->pc_cpuid, pc); 1170 } 1171 } 1172 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1173 1174 /* 1175 * Sysctl monitoring for netisr: query a list of registered protocols. 1176 */ 1177 static int 1178 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1179 { 1180 struct rm_priotracker tracker; 1181 struct sysctl_netisr_proto *snpp, *snp_array; 1182 struct netisr_proto *npp; 1183 u_int counter, proto; 1184 int error; 1185 1186 if (req->newptr != NULL) 1187 return (EINVAL); 1188 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1189 M_ZERO | M_WAITOK); 1190 counter = 0; 1191 NETISR_RLOCK(&tracker); 1192 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1193 npp = &netisr_proto[proto]; 1194 if (npp->np_name == NULL) 1195 continue; 1196 snpp = &snp_array[counter]; 1197 snpp->snp_version = sizeof(*snpp); 1198 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1199 snpp->snp_proto = proto; 1200 snpp->snp_qlimit = npp->np_qlimit; 1201 snpp->snp_policy = npp->np_policy; 1202 snpp->snp_dispatch = npp->np_dispatch; 1203 if (npp->np_m2flow != NULL) 1204 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1205 if (npp->np_m2cpuid != NULL) 1206 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1207 if (npp->np_drainedcpu != NULL) 1208 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1209 counter++; 1210 } 1211 NETISR_RUNLOCK(&tracker); 1212 KASSERT(counter <= NETISR_MAXPROT, 1213 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1214 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1215 free(snp_array, M_TEMP); 1216 return (error); 1217 } 1218 1219 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1220 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1221 "S,sysctl_netisr_proto", 1222 "Return list of protocols registered with netisr"); 1223 1224 /* 1225 * Sysctl monitoring for netisr: query a list of workstreams. 1226 */ 1227 static int 1228 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1229 { 1230 struct rm_priotracker tracker; 1231 struct sysctl_netisr_workstream *snwsp, *snws_array; 1232 struct netisr_workstream *nwsp; 1233 u_int counter, cpuid; 1234 int error; 1235 1236 if (req->newptr != NULL) 1237 return (EINVAL); 1238 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1239 M_ZERO | M_WAITOK); 1240 counter = 0; 1241 NETISR_RLOCK(&tracker); 1242 CPU_FOREACH(cpuid) { 1243 nwsp = DPCPU_ID_PTR(cpuid, nws); 1244 if (nwsp->nws_intr_event == NULL) 1245 continue; 1246 NWS_LOCK(nwsp); 1247 snwsp = &snws_array[counter]; 1248 snwsp->snws_version = sizeof(*snwsp); 1249 1250 /* 1251 * For now, we equate workstream IDs and CPU IDs in the 1252 * kernel, but expose them independently to userspace in case 1253 * that assumption changes in the future. 1254 */ 1255 snwsp->snws_wsid = cpuid; 1256 snwsp->snws_cpu = cpuid; 1257 if (nwsp->nws_intr_event != NULL) 1258 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1259 NWS_UNLOCK(nwsp); 1260 counter++; 1261 } 1262 NETISR_RUNLOCK(&tracker); 1263 KASSERT(counter <= MAXCPU, 1264 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1265 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1266 free(snws_array, M_TEMP); 1267 return (error); 1268 } 1269 1270 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1271 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1272 "S,sysctl_netisr_workstream", 1273 "Return list of workstreams implemented by netisr"); 1274 1275 /* 1276 * Sysctl monitoring for netisr: query per-protocol data across all 1277 * workstreams. 1278 */ 1279 static int 1280 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1281 { 1282 struct rm_priotracker tracker; 1283 struct sysctl_netisr_work *snwp, *snw_array; 1284 struct netisr_workstream *nwsp; 1285 struct netisr_proto *npp; 1286 struct netisr_work *nwp; 1287 u_int counter, cpuid, proto; 1288 int error; 1289 1290 if (req->newptr != NULL) 1291 return (EINVAL); 1292 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1293 M_TEMP, M_ZERO | M_WAITOK); 1294 counter = 0; 1295 NETISR_RLOCK(&tracker); 1296 CPU_FOREACH(cpuid) { 1297 nwsp = DPCPU_ID_PTR(cpuid, nws); 1298 if (nwsp->nws_intr_event == NULL) 1299 continue; 1300 NWS_LOCK(nwsp); 1301 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1302 npp = &netisr_proto[proto]; 1303 if (npp->np_name == NULL) 1304 continue; 1305 nwp = &nwsp->nws_work[proto]; 1306 snwp = &snw_array[counter]; 1307 snwp->snw_version = sizeof(*snwp); 1308 snwp->snw_wsid = cpuid; /* See comment above. */ 1309 snwp->snw_proto = proto; 1310 snwp->snw_len = nwp->nw_len; 1311 snwp->snw_watermark = nwp->nw_watermark; 1312 snwp->snw_dispatched = nwp->nw_dispatched; 1313 snwp->snw_hybrid_dispatched = 1314 nwp->nw_hybrid_dispatched; 1315 snwp->snw_qdrops = nwp->nw_qdrops; 1316 snwp->snw_queued = nwp->nw_queued; 1317 snwp->snw_handled = nwp->nw_handled; 1318 counter++; 1319 } 1320 NWS_UNLOCK(nwsp); 1321 } 1322 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1323 ("sysctl_netisr_work: counter too big (%d)", counter)); 1324 NETISR_RUNLOCK(&tracker); 1325 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1326 free(snw_array, M_TEMP); 1327 return (error); 1328 } 1329 1330 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1331 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1332 "S,sysctl_netisr_work", 1333 "Return list of per-workstream, per-protocol work in netisr"); 1334 1335 #ifdef DDB 1336 DB_SHOW_COMMAND(netisr, db_show_netisr) 1337 { 1338 struct netisr_workstream *nwsp; 1339 struct netisr_work *nwp; 1340 int first, proto; 1341 u_int cpuid; 1342 1343 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1344 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1345 CPU_FOREACH(cpuid) { 1346 nwsp = DPCPU_ID_PTR(cpuid, nws); 1347 if (nwsp->nws_intr_event == NULL) 1348 continue; 1349 first = 1; 1350 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1351 if (netisr_proto[proto].np_handler == NULL) 1352 continue; 1353 nwp = &nwsp->nws_work[proto]; 1354 if (first) { 1355 db_printf("%3d ", cpuid); 1356 first = 0; 1357 } else 1358 db_printf("%3s ", ""); 1359 db_printf( 1360 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1361 netisr_proto[proto].np_name, nwp->nw_len, 1362 nwp->nw_watermark, nwp->nw_qlimit, 1363 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1364 nwp->nw_qdrops, nwp->nw_queued); 1365 } 1366 } 1367 } 1368 #endif 1369