1 /*- 2 * Copyright (c) 2007-2009 Robert N. M. Watson 3 * Copyright (c) 2010-2011 Juniper Networks, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Robert N. M. Watson under contract 7 * to Juniper Networks, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 /* 35 * netisr is a packet dispatch service, allowing synchronous (directly 36 * dispatched) and asynchronous (deferred dispatch) processing of packets by 37 * registered protocol handlers. Callers pass a protocol identifier and 38 * packet to netisr, along with a direct dispatch hint, and work will either 39 * be immediately processed by the registered handler, or passed to a 40 * software interrupt (SWI) thread for deferred dispatch. Callers will 41 * generally select one or the other based on: 42 * 43 * - Whether directly dispatching a netisr handler lead to code reentrance or 44 * lock recursion, such as entering the socket code from the socket code. 45 * - Whether directly dispatching a netisr handler lead to recursive 46 * processing, such as when decapsulating several wrapped layers of tunnel 47 * information (IPSEC within IPSEC within ...). 48 * 49 * Maintaining ordering for protocol streams is a critical design concern. 50 * Enforcing ordering limits the opportunity for concurrency, but maintains 51 * the strong ordering requirements found in some protocols, such as TCP. Of 52 * related concern is CPU affinity--it is desirable to process all data 53 * associated with a particular stream on the same CPU over time in order to 54 * avoid acquiring locks associated with the connection on different CPUs, 55 * keep connection data in one cache, and to generally encourage associated 56 * user threads to live on the same CPU as the stream. It's also desirable 57 * to avoid lock migration and contention where locks are associated with 58 * more than one flow. 59 * 60 * netisr supports several policy variations, represented by the 61 * NETISR_POLICY_* constants, allowing protocols to play various roles in 62 * identifying flows, assigning work to CPUs, etc. These are described in 63 * netisr.h. 64 */ 65 66 #include "opt_ddb.h" 67 #include "opt_device_polling.h" 68 69 #include <sys/param.h> 70 #include <sys/bus.h> 71 #include <sys/kernel.h> 72 #include <sys/kthread.h> 73 #include <sys/interrupt.h> 74 #include <sys/lock.h> 75 #include <sys/mbuf.h> 76 #include <sys/mutex.h> 77 #include <sys/pcpu.h> 78 #include <sys/proc.h> 79 #include <sys/rmlock.h> 80 #include <sys/sched.h> 81 #include <sys/smp.h> 82 #include <sys/socket.h> 83 #include <sys/sysctl.h> 84 #include <sys/systm.h> 85 86 #ifdef DDB 87 #include <ddb/ddb.h> 88 #endif 89 90 #define _WANT_NETISR_INTERNAL /* Enable definitions from netisr_internal.h */ 91 #include <net/if.h> 92 #include <net/if_var.h> 93 #include <net/netisr.h> 94 #include <net/netisr_internal.h> 95 #include <net/vnet.h> 96 97 /*- 98 * Synchronize use and modification of the registered netisr data structures; 99 * acquire a read lock while modifying the set of registered protocols to 100 * prevent partially registered or unregistered protocols from being run. 101 * 102 * The following data structures and fields are protected by this lock: 103 * 104 * - The netisr_proto array, including all fields of struct netisr_proto. 105 * - The nws array, including all fields of struct netisr_worker. 106 * - The nws_array array. 107 * 108 * Note: the NETISR_LOCKING define controls whether read locks are acquired 109 * in packet processing paths requiring netisr registration stability. This 110 * is disabled by default as it can lead to measurable performance 111 * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 112 * because netisr registration and unregistration is extremely rare at 113 * runtime. If it becomes more common, this decision should be revisited. 114 * 115 * XXXRW: rmlocks don't support assertions. 116 */ 117 static struct rmlock netisr_rmlock; 118 #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 119 RM_NOWITNESS) 120 #define NETISR_LOCK_ASSERT() 121 #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 122 #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 123 #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 124 #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 125 /* #define NETISR_LOCKING */ 126 127 static SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 128 129 /*- 130 * Three global direct dispatch policies are supported: 131 * 132 * NETISR_DISPATCH_DEFERRED: All work is deferred for a netisr, regardless of 133 * context (may be overriden by protocols). 134 * 135 * NETISR_DISPATCH_HYBRID: If the executing context allows direct dispatch, 136 * and we're running on the CPU the work would be performed on, then direct 137 * dispatch it if it wouldn't violate ordering constraints on the workstream. 138 * 139 * NETISR_DISPATCH_DIRECT: If the executing context allows direct dispatch, 140 * always direct dispatch. (The default.) 141 * 142 * Notice that changing the global policy could lead to short periods of 143 * misordered processing, but this is considered acceptable as compared to 144 * the complexity of enforcing ordering during policy changes. Protocols can 145 * override the global policy (when they're not doing that, they select 146 * NETISR_DISPATCH_DEFAULT). 147 */ 148 #define NETISR_DISPATCH_POLICY_DEFAULT NETISR_DISPATCH_DIRECT 149 #define NETISR_DISPATCH_POLICY_MAXSTR 20 /* Used for temporary buffers. */ 150 static u_int netisr_dispatch_policy = NETISR_DISPATCH_POLICY_DEFAULT; 151 static int sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS); 152 SYSCTL_PROC(_net_isr, OID_AUTO, dispatch, CTLTYPE_STRING | CTLFLAG_RWTUN, 153 0, 0, sysctl_netisr_dispatch_policy, "A", 154 "netisr dispatch policy"); 155 156 /* 157 * Allow the administrator to limit the number of threads (CPUs) to use for 158 * netisr. We don't check netisr_maxthreads before creating the thread for 159 * CPU 0, so in practice we ignore values <= 1. This must be set at boot. 160 * We will create at most one thread per CPU. 161 */ 162 static int netisr_maxthreads = -1; /* Max number of threads. */ 163 SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RDTUN, 164 &netisr_maxthreads, 0, 165 "Use at most this many CPUs for netisr processing"); 166 167 static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 168 SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RDTUN, 169 &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 170 171 /* 172 * Limit per-workstream mbuf queue limits s to at most net.isr.maxqlimit, 173 * both for initial configuration and later modification using 174 * netisr_setqlimit(). 175 */ 176 #define NETISR_DEFAULT_MAXQLIMIT 10240 177 static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 178 SYSCTL_UINT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RDTUN, 179 &netisr_maxqlimit, 0, 180 "Maximum netisr per-protocol, per-CPU queue depth."); 181 182 /* 183 * The default per-workstream mbuf queue limit for protocols that don't 184 * initialize the nh_qlimit field of their struct netisr_handler. If this is 185 * set above netisr_maxqlimit, we truncate it to the maximum during boot. 186 */ 187 #define NETISR_DEFAULT_DEFAULTQLIMIT 256 188 static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 189 SYSCTL_UINT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RDTUN, 190 &netisr_defaultqlimit, 0, 191 "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 192 193 /* 194 * Store and export the compile-time constant NETISR_MAXPROT limit on the 195 * number of protocols that can register with netisr at a time. This is 196 * required for crashdump analysis, as it sizes netisr_proto[]. 197 */ 198 static u_int netisr_maxprot = NETISR_MAXPROT; 199 SYSCTL_UINT(_net_isr, OID_AUTO, maxprot, CTLFLAG_RD, 200 &netisr_maxprot, 0, 201 "Compile-time limit on the number of protocols supported by netisr."); 202 203 /* 204 * The netisr_proto array describes all registered protocols, indexed by 205 * protocol number. See netisr_internal.h for more details. 206 */ 207 static struct netisr_proto netisr_proto[NETISR_MAXPROT]; 208 209 /* 210 * Per-CPU workstream data. See netisr_internal.h for more details. 211 */ 212 DPCPU_DEFINE(struct netisr_workstream, nws); 213 214 /* 215 * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 216 * accessing workstreams. This allows constructions of the form 217 * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 218 */ 219 static u_int nws_array[MAXCPU]; 220 221 /* 222 * Number of registered workstreams. Will be at most the number of running 223 * CPUs once fully started. 224 */ 225 static u_int nws_count; 226 SYSCTL_UINT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 227 &nws_count, 0, "Number of extant netisr threads."); 228 229 /* 230 * Synchronization for each workstream: a mutex protects all mutable fields 231 * in each stream, including per-protocol state (mbuf queues). The SWI is 232 * woken up if asynchronous dispatch is required. 233 */ 234 #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 235 #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 236 #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 237 #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 238 239 /* 240 * Utility routines for protocols that implement their own mapping of flows 241 * to CPUs. 242 */ 243 u_int 244 netisr_get_cpucount(void) 245 { 246 247 return (nws_count); 248 } 249 250 u_int 251 netisr_get_cpuid(u_int cpunumber) 252 { 253 254 KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, 255 nws_count)); 256 257 return (nws_array[cpunumber]); 258 } 259 260 /* 261 * The default implementation of flow -> CPU ID mapping. 262 * 263 * Non-static so that protocols can use it to map their own work to specific 264 * CPUs in a manner consistent to netisr for affinity purposes. 265 */ 266 u_int 267 netisr_default_flow2cpu(u_int flowid) 268 { 269 270 return (nws_array[flowid % nws_count]); 271 } 272 273 /* 274 * Dispatch tunable and sysctl configuration. 275 */ 276 struct netisr_dispatch_table_entry { 277 u_int ndte_policy; 278 const char *ndte_policy_str; 279 }; 280 static const struct netisr_dispatch_table_entry netisr_dispatch_table[] = { 281 { NETISR_DISPATCH_DEFAULT, "default" }, 282 { NETISR_DISPATCH_DEFERRED, "deferred" }, 283 { NETISR_DISPATCH_HYBRID, "hybrid" }, 284 { NETISR_DISPATCH_DIRECT, "direct" }, 285 }; 286 static const u_int netisr_dispatch_table_len = 287 (sizeof(netisr_dispatch_table) / sizeof(netisr_dispatch_table[0])); 288 289 static void 290 netisr_dispatch_policy_to_str(u_int dispatch_policy, char *buffer, 291 u_int buflen) 292 { 293 const struct netisr_dispatch_table_entry *ndtep; 294 const char *str; 295 u_int i; 296 297 str = "unknown"; 298 for (i = 0; i < netisr_dispatch_table_len; i++) { 299 ndtep = &netisr_dispatch_table[i]; 300 if (ndtep->ndte_policy == dispatch_policy) { 301 str = ndtep->ndte_policy_str; 302 break; 303 } 304 } 305 snprintf(buffer, buflen, "%s", str); 306 } 307 308 static int 309 netisr_dispatch_policy_from_str(const char *str, u_int *dispatch_policyp) 310 { 311 const struct netisr_dispatch_table_entry *ndtep; 312 u_int i; 313 314 for (i = 0; i < netisr_dispatch_table_len; i++) { 315 ndtep = &netisr_dispatch_table[i]; 316 if (strcmp(ndtep->ndte_policy_str, str) == 0) { 317 *dispatch_policyp = ndtep->ndte_policy; 318 return (0); 319 } 320 } 321 return (EINVAL); 322 } 323 324 static int 325 sysctl_netisr_dispatch_policy(SYSCTL_HANDLER_ARGS) 326 { 327 char tmp[NETISR_DISPATCH_POLICY_MAXSTR]; 328 u_int dispatch_policy; 329 int error; 330 331 netisr_dispatch_policy_to_str(netisr_dispatch_policy, tmp, 332 sizeof(tmp)); 333 error = sysctl_handle_string(oidp, tmp, sizeof(tmp), req); 334 if (error == 0 && req->newptr != NULL) { 335 error = netisr_dispatch_policy_from_str(tmp, 336 &dispatch_policy); 337 if (error == 0 && dispatch_policy == NETISR_DISPATCH_DEFAULT) 338 error = EINVAL; 339 if (error == 0) 340 netisr_dispatch_policy = dispatch_policy; 341 } 342 return (error); 343 } 344 345 /* 346 * Register a new netisr handler, which requires initializing per-protocol 347 * fields for each workstream. All netisr work is briefly suspended while 348 * the protocol is installed. 349 */ 350 void 351 netisr_register(const struct netisr_handler *nhp) 352 { 353 struct netisr_work *npwp; 354 const char *name; 355 u_int i, proto; 356 357 proto = nhp->nh_proto; 358 name = nhp->nh_name; 359 360 /* 361 * Test that the requested registration is valid. 362 */ 363 KASSERT(nhp->nh_name != NULL, 364 ("%s: nh_name NULL for %u", __func__, proto)); 365 KASSERT(nhp->nh_handler != NULL, 366 ("%s: nh_handler NULL for %s", __func__, name)); 367 KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 368 nhp->nh_policy == NETISR_POLICY_FLOW || 369 nhp->nh_policy == NETISR_POLICY_CPU, 370 ("%s: unsupported nh_policy %u for %s", __func__, 371 nhp->nh_policy, name)); 372 KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 373 nhp->nh_m2flow == NULL, 374 ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 375 name)); 376 KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 377 ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 378 name)); 379 KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 380 ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 381 name)); 382 KASSERT(nhp->nh_dispatch == NETISR_DISPATCH_DEFAULT || 383 nhp->nh_dispatch == NETISR_DISPATCH_DEFERRED || 384 nhp->nh_dispatch == NETISR_DISPATCH_HYBRID || 385 nhp->nh_dispatch == NETISR_DISPATCH_DIRECT, 386 ("%s: invalid nh_dispatch (%u)", __func__, nhp->nh_dispatch)); 387 388 KASSERT(proto < NETISR_MAXPROT, 389 ("%s(%u, %s): protocol too big", __func__, proto, name)); 390 391 /* 392 * Test that no existing registration exists for this protocol. 393 */ 394 NETISR_WLOCK(); 395 KASSERT(netisr_proto[proto].np_name == NULL, 396 ("%s(%u, %s): name present", __func__, proto, name)); 397 KASSERT(netisr_proto[proto].np_handler == NULL, 398 ("%s(%u, %s): handler present", __func__, proto, name)); 399 400 netisr_proto[proto].np_name = name; 401 netisr_proto[proto].np_handler = nhp->nh_handler; 402 netisr_proto[proto].np_m2flow = nhp->nh_m2flow; 403 netisr_proto[proto].np_m2cpuid = nhp->nh_m2cpuid; 404 netisr_proto[proto].np_drainedcpu = nhp->nh_drainedcpu; 405 if (nhp->nh_qlimit == 0) 406 netisr_proto[proto].np_qlimit = netisr_defaultqlimit; 407 else if (nhp->nh_qlimit > netisr_maxqlimit) { 408 printf("%s: %s requested queue limit %u capped to " 409 "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 410 netisr_maxqlimit); 411 netisr_proto[proto].np_qlimit = netisr_maxqlimit; 412 } else 413 netisr_proto[proto].np_qlimit = nhp->nh_qlimit; 414 netisr_proto[proto].np_policy = nhp->nh_policy; 415 netisr_proto[proto].np_dispatch = nhp->nh_dispatch; 416 CPU_FOREACH(i) { 417 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 418 bzero(npwp, sizeof(*npwp)); 419 npwp->nw_qlimit = netisr_proto[proto].np_qlimit; 420 } 421 NETISR_WUNLOCK(); 422 } 423 424 /* 425 * Clear drop counters across all workstreams for a protocol. 426 */ 427 void 428 netisr_clearqdrops(const struct netisr_handler *nhp) 429 { 430 struct netisr_work *npwp; 431 #ifdef INVARIANTS 432 const char *name; 433 #endif 434 u_int i, proto; 435 436 proto = nhp->nh_proto; 437 #ifdef INVARIANTS 438 name = nhp->nh_name; 439 #endif 440 KASSERT(proto < NETISR_MAXPROT, 441 ("%s(%u): protocol too big for %s", __func__, proto, name)); 442 443 NETISR_WLOCK(); 444 KASSERT(netisr_proto[proto].np_handler != NULL, 445 ("%s(%u): protocol not registered for %s", __func__, proto, 446 name)); 447 448 CPU_FOREACH(i) { 449 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 450 npwp->nw_qdrops = 0; 451 } 452 NETISR_WUNLOCK(); 453 } 454 455 /* 456 * Query current drop counters across all workstreams for a protocol. 457 */ 458 void 459 netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 460 { 461 struct netisr_work *npwp; 462 struct rm_priotracker tracker; 463 #ifdef INVARIANTS 464 const char *name; 465 #endif 466 u_int i, proto; 467 468 *qdropp = 0; 469 proto = nhp->nh_proto; 470 #ifdef INVARIANTS 471 name = nhp->nh_name; 472 #endif 473 KASSERT(proto < NETISR_MAXPROT, 474 ("%s(%u): protocol too big for %s", __func__, proto, name)); 475 476 NETISR_RLOCK(&tracker); 477 KASSERT(netisr_proto[proto].np_handler != NULL, 478 ("%s(%u): protocol not registered for %s", __func__, proto, 479 name)); 480 481 CPU_FOREACH(i) { 482 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 483 *qdropp += npwp->nw_qdrops; 484 } 485 NETISR_RUNLOCK(&tracker); 486 } 487 488 /* 489 * Query current per-workstream queue limit for a protocol. 490 */ 491 void 492 netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 493 { 494 struct rm_priotracker tracker; 495 #ifdef INVARIANTS 496 const char *name; 497 #endif 498 u_int proto; 499 500 proto = nhp->nh_proto; 501 #ifdef INVARIANTS 502 name = nhp->nh_name; 503 #endif 504 KASSERT(proto < NETISR_MAXPROT, 505 ("%s(%u): protocol too big for %s", __func__, proto, name)); 506 507 NETISR_RLOCK(&tracker); 508 KASSERT(netisr_proto[proto].np_handler != NULL, 509 ("%s(%u): protocol not registered for %s", __func__, proto, 510 name)); 511 *qlimitp = netisr_proto[proto].np_qlimit; 512 NETISR_RUNLOCK(&tracker); 513 } 514 515 /* 516 * Update the queue limit across per-workstream queues for a protocol. We 517 * simply change the limits, and don't drain overflowed packets as they will 518 * (hopefully) take care of themselves shortly. 519 */ 520 int 521 netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 522 { 523 struct netisr_work *npwp; 524 #ifdef INVARIANTS 525 const char *name; 526 #endif 527 u_int i, proto; 528 529 if (qlimit > netisr_maxqlimit) 530 return (EINVAL); 531 532 proto = nhp->nh_proto; 533 #ifdef INVARIANTS 534 name = nhp->nh_name; 535 #endif 536 KASSERT(proto < NETISR_MAXPROT, 537 ("%s(%u): protocol too big for %s", __func__, proto, name)); 538 539 NETISR_WLOCK(); 540 KASSERT(netisr_proto[proto].np_handler != NULL, 541 ("%s(%u): protocol not registered for %s", __func__, proto, 542 name)); 543 544 netisr_proto[proto].np_qlimit = qlimit; 545 CPU_FOREACH(i) { 546 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 547 npwp->nw_qlimit = qlimit; 548 } 549 NETISR_WUNLOCK(); 550 return (0); 551 } 552 553 /* 554 * Drain all packets currently held in a particular protocol work queue. 555 */ 556 static void 557 netisr_drain_proto(struct netisr_work *npwp) 558 { 559 struct mbuf *m; 560 561 /* 562 * We would assert the lock on the workstream but it's not passed in. 563 */ 564 while ((m = npwp->nw_head) != NULL) { 565 npwp->nw_head = m->m_nextpkt; 566 m->m_nextpkt = NULL; 567 if (npwp->nw_head == NULL) 568 npwp->nw_tail = NULL; 569 npwp->nw_len--; 570 m_freem(m); 571 } 572 KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 573 KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 574 } 575 576 /* 577 * Remove the registration of a network protocol, which requires clearing 578 * per-protocol fields across all workstreams, including freeing all mbufs in 579 * the queues at time of unregister. All work in netisr is briefly suspended 580 * while this takes place. 581 */ 582 void 583 netisr_unregister(const struct netisr_handler *nhp) 584 { 585 struct netisr_work *npwp; 586 #ifdef INVARIANTS 587 const char *name; 588 #endif 589 u_int i, proto; 590 591 proto = nhp->nh_proto; 592 #ifdef INVARIANTS 593 name = nhp->nh_name; 594 #endif 595 KASSERT(proto < NETISR_MAXPROT, 596 ("%s(%u): protocol too big for %s", __func__, proto, name)); 597 598 NETISR_WLOCK(); 599 KASSERT(netisr_proto[proto].np_handler != NULL, 600 ("%s(%u): protocol not registered for %s", __func__, proto, 601 name)); 602 603 netisr_proto[proto].np_name = NULL; 604 netisr_proto[proto].np_handler = NULL; 605 netisr_proto[proto].np_m2flow = NULL; 606 netisr_proto[proto].np_m2cpuid = NULL; 607 netisr_proto[proto].np_qlimit = 0; 608 netisr_proto[proto].np_policy = 0; 609 CPU_FOREACH(i) { 610 npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 611 netisr_drain_proto(npwp); 612 bzero(npwp, sizeof(*npwp)); 613 } 614 NETISR_WUNLOCK(); 615 } 616 617 /* 618 * Compose the global and per-protocol policies on dispatch, and return the 619 * dispatch policy to use. 620 */ 621 static u_int 622 netisr_get_dispatch(struct netisr_proto *npp) 623 { 624 625 /* 626 * Protocol-specific configuration overrides the global default. 627 */ 628 if (npp->np_dispatch != NETISR_DISPATCH_DEFAULT) 629 return (npp->np_dispatch); 630 return (netisr_dispatch_policy); 631 } 632 633 /* 634 * Look up the workstream given a packet and source identifier. Do this by 635 * checking the protocol's policy, and optionally call out to the protocol 636 * for assistance if required. 637 */ 638 static struct mbuf * 639 netisr_select_cpuid(struct netisr_proto *npp, u_int dispatch_policy, 640 uintptr_t source, struct mbuf *m, u_int *cpuidp) 641 { 642 struct ifnet *ifp; 643 u_int policy; 644 645 NETISR_LOCK_ASSERT(); 646 647 /* 648 * In the event we have only one worker, shortcut and deliver to it 649 * without further ado. 650 */ 651 if (nws_count == 1) { 652 *cpuidp = nws_array[0]; 653 return (m); 654 } 655 656 /* 657 * What happens next depends on the policy selected by the protocol. 658 * If we want to support per-interface policies, we should do that 659 * here first. 660 */ 661 policy = npp->np_policy; 662 if (policy == NETISR_POLICY_CPU) { 663 m = npp->np_m2cpuid(m, source, cpuidp); 664 if (m == NULL) 665 return (NULL); 666 667 /* 668 * It's possible for a protocol not to have a good idea about 669 * where to process a packet, in which case we fall back on 670 * the netisr code to decide. In the hybrid case, return the 671 * current CPU ID, which will force an immediate direct 672 * dispatch. In the queued case, fall back on the SOURCE 673 * policy. 674 */ 675 if (*cpuidp != NETISR_CPUID_NONE) 676 return (m); 677 if (dispatch_policy == NETISR_DISPATCH_HYBRID) { 678 *cpuidp = curcpu; 679 return (m); 680 } 681 policy = NETISR_POLICY_SOURCE; 682 } 683 684 if (policy == NETISR_POLICY_FLOW) { 685 if (M_HASHTYPE_GET(m) == M_HASHTYPE_NONE && 686 npp->np_m2flow != NULL) { 687 m = npp->np_m2flow(m, source); 688 if (m == NULL) 689 return (NULL); 690 } 691 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { 692 *cpuidp = 693 netisr_default_flow2cpu(m->m_pkthdr.flowid); 694 return (m); 695 } 696 policy = NETISR_POLICY_SOURCE; 697 } 698 699 KASSERT(policy == NETISR_POLICY_SOURCE, 700 ("%s: invalid policy %u for %s", __func__, npp->np_policy, 701 npp->np_name)); 702 703 ifp = m->m_pkthdr.rcvif; 704 if (ifp != NULL) 705 *cpuidp = nws_array[(ifp->if_index + source) % nws_count]; 706 else 707 *cpuidp = nws_array[source % nws_count]; 708 return (m); 709 } 710 711 /* 712 * Process packets associated with a workstream and protocol. For reasons of 713 * fairness, we process up to one complete netisr queue at a time, moving the 714 * queue to a stack-local queue for processing, but do not loop refreshing 715 * from the global queue. The caller is responsible for deciding whether to 716 * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 717 * locked on entry and relocked before return, but will be released while 718 * processing. The number of packets processed is returned. 719 */ 720 static u_int 721 netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 722 { 723 struct netisr_work local_npw, *npwp; 724 u_int handled; 725 struct mbuf *m; 726 727 NETISR_LOCK_ASSERT(); 728 NWS_LOCK_ASSERT(nwsp); 729 730 KASSERT(nwsp->nws_flags & NWS_RUNNING, 731 ("%s(%u): not running", __func__, proto)); 732 KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 733 ("%s(%u): invalid proto\n", __func__, proto)); 734 735 npwp = &nwsp->nws_work[proto]; 736 if (npwp->nw_len == 0) 737 return (0); 738 739 /* 740 * Move the global work queue to a thread-local work queue. 741 * 742 * Notice that this means the effective maximum length of the queue 743 * is actually twice that of the maximum queue length specified in 744 * the protocol registration call. 745 */ 746 handled = npwp->nw_len; 747 local_npw = *npwp; 748 npwp->nw_head = NULL; 749 npwp->nw_tail = NULL; 750 npwp->nw_len = 0; 751 nwsp->nws_pendingbits &= ~(1 << proto); 752 NWS_UNLOCK(nwsp); 753 while ((m = local_npw.nw_head) != NULL) { 754 local_npw.nw_head = m->m_nextpkt; 755 m->m_nextpkt = NULL; 756 if (local_npw.nw_head == NULL) 757 local_npw.nw_tail = NULL; 758 local_npw.nw_len--; 759 VNET_ASSERT(m->m_pkthdr.rcvif != NULL, 760 ("%s:%d rcvif == NULL: m=%p", __func__, __LINE__, m)); 761 CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 762 netisr_proto[proto].np_handler(m); 763 CURVNET_RESTORE(); 764 } 765 KASSERT(local_npw.nw_len == 0, 766 ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 767 if (netisr_proto[proto].np_drainedcpu) 768 netisr_proto[proto].np_drainedcpu(nwsp->nws_cpu); 769 NWS_LOCK(nwsp); 770 npwp->nw_handled += handled; 771 return (handled); 772 } 773 774 /* 775 * SWI handler for netisr -- processes packets in a set of workstreams that 776 * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 777 * being direct dispatched, go back to sleep and wait for the dispatching 778 * thread to wake us up again. 779 */ 780 static void 781 swi_net(void *arg) 782 { 783 #ifdef NETISR_LOCKING 784 struct rm_priotracker tracker; 785 #endif 786 struct netisr_workstream *nwsp; 787 u_int bits, prot; 788 789 nwsp = arg; 790 791 #ifdef DEVICE_POLLING 792 KASSERT(nws_count == 1, 793 ("%s: device_polling but nws_count != 1", __func__)); 794 netisr_poll(); 795 #endif 796 #ifdef NETISR_LOCKING 797 NETISR_RLOCK(&tracker); 798 #endif 799 NWS_LOCK(nwsp); 800 KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 801 if (nwsp->nws_flags & NWS_DISPATCHING) 802 goto out; 803 nwsp->nws_flags |= NWS_RUNNING; 804 nwsp->nws_flags &= ~NWS_SCHEDULED; 805 while ((bits = nwsp->nws_pendingbits) != 0) { 806 while ((prot = ffs(bits)) != 0) { 807 prot--; 808 bits &= ~(1 << prot); 809 (void)netisr_process_workstream_proto(nwsp, prot); 810 } 811 } 812 nwsp->nws_flags &= ~NWS_RUNNING; 813 out: 814 NWS_UNLOCK(nwsp); 815 #ifdef NETISR_LOCKING 816 NETISR_RUNLOCK(&tracker); 817 #endif 818 #ifdef DEVICE_POLLING 819 netisr_pollmore(); 820 #endif 821 } 822 823 static int 824 netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 825 struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 826 { 827 828 NWS_LOCK_ASSERT(nwsp); 829 830 *dosignalp = 0; 831 if (npwp->nw_len < npwp->nw_qlimit) { 832 m->m_nextpkt = NULL; 833 if (npwp->nw_head == NULL) { 834 npwp->nw_head = m; 835 npwp->nw_tail = m; 836 } else { 837 npwp->nw_tail->m_nextpkt = m; 838 npwp->nw_tail = m; 839 } 840 npwp->nw_len++; 841 if (npwp->nw_len > npwp->nw_watermark) 842 npwp->nw_watermark = npwp->nw_len; 843 844 /* 845 * We must set the bit regardless of NWS_RUNNING, so that 846 * swi_net() keeps calling netisr_process_workstream_proto(). 847 */ 848 nwsp->nws_pendingbits |= (1 << proto); 849 if (!(nwsp->nws_flags & 850 (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 851 nwsp->nws_flags |= NWS_SCHEDULED; 852 *dosignalp = 1; /* Defer until unlocked. */ 853 } 854 npwp->nw_queued++; 855 return (0); 856 } else { 857 m_freem(m); 858 npwp->nw_qdrops++; 859 return (ENOBUFS); 860 } 861 } 862 863 static int 864 netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 865 { 866 struct netisr_workstream *nwsp; 867 struct netisr_work *npwp; 868 int dosignal, error; 869 870 #ifdef NETISR_LOCKING 871 NETISR_LOCK_ASSERT(); 872 #endif 873 KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 874 cpuid, mp_maxid)); 875 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 876 877 dosignal = 0; 878 error = 0; 879 nwsp = DPCPU_ID_PTR(cpuid, nws); 880 npwp = &nwsp->nws_work[proto]; 881 NWS_LOCK(nwsp); 882 error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 883 NWS_UNLOCK(nwsp); 884 if (dosignal) 885 NWS_SIGNAL(nwsp); 886 return (error); 887 } 888 889 int 890 netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 891 { 892 #ifdef NETISR_LOCKING 893 struct rm_priotracker tracker; 894 #endif 895 u_int cpuid; 896 int error; 897 898 KASSERT(proto < NETISR_MAXPROT, 899 ("%s: invalid proto %u", __func__, proto)); 900 901 #ifdef NETISR_LOCKING 902 NETISR_RLOCK(&tracker); 903 #endif 904 KASSERT(netisr_proto[proto].np_handler != NULL, 905 ("%s: invalid proto %u", __func__, proto)); 906 907 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_DEFERRED, 908 source, m, &cpuid); 909 if (m != NULL) { 910 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 911 cpuid)); 912 error = netisr_queue_internal(proto, m, cpuid); 913 } else 914 error = ENOBUFS; 915 #ifdef NETISR_LOCKING 916 NETISR_RUNLOCK(&tracker); 917 #endif 918 return (error); 919 } 920 921 int 922 netisr_queue(u_int proto, struct mbuf *m) 923 { 924 925 return (netisr_queue_src(proto, 0, m)); 926 } 927 928 /* 929 * Dispatch a packet for netisr processing; direct dispatch is permitted by 930 * calling context. 931 */ 932 int 933 netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 934 { 935 #ifdef NETISR_LOCKING 936 struct rm_priotracker tracker; 937 #endif 938 struct netisr_workstream *nwsp; 939 struct netisr_proto *npp; 940 struct netisr_work *npwp; 941 int dosignal, error; 942 u_int cpuid, dispatch_policy; 943 944 KASSERT(proto < NETISR_MAXPROT, 945 ("%s: invalid proto %u", __func__, proto)); 946 #ifdef NETISR_LOCKING 947 NETISR_RLOCK(&tracker); 948 #endif 949 npp = &netisr_proto[proto]; 950 KASSERT(npp->np_handler != NULL, ("%s: invalid proto %u", __func__, 951 proto)); 952 953 dispatch_policy = netisr_get_dispatch(npp); 954 if (dispatch_policy == NETISR_DISPATCH_DEFERRED) 955 return (netisr_queue_src(proto, source, m)); 956 957 /* 958 * If direct dispatch is forced, then unconditionally dispatch 959 * without a formal CPU selection. Borrow the current CPU's stats, 960 * even if there's no worker on it. In this case we don't update 961 * nws_flags because all netisr processing will be source ordered due 962 * to always being forced to directly dispatch. 963 */ 964 if (dispatch_policy == NETISR_DISPATCH_DIRECT) { 965 nwsp = DPCPU_PTR(nws); 966 npwp = &nwsp->nws_work[proto]; 967 npwp->nw_dispatched++; 968 npwp->nw_handled++; 969 netisr_proto[proto].np_handler(m); 970 error = 0; 971 goto out_unlock; 972 } 973 974 KASSERT(dispatch_policy == NETISR_DISPATCH_HYBRID, 975 ("%s: unknown dispatch policy (%u)", __func__, dispatch_policy)); 976 977 /* 978 * Otherwise, we execute in a hybrid mode where we will try to direct 979 * dispatch if we're on the right CPU and the netisr worker isn't 980 * already running. 981 */ 982 sched_pin(); 983 m = netisr_select_cpuid(&netisr_proto[proto], NETISR_DISPATCH_HYBRID, 984 source, m, &cpuid); 985 if (m == NULL) { 986 error = ENOBUFS; 987 goto out_unpin; 988 } 989 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 990 if (cpuid != curcpu) 991 goto queue_fallback; 992 nwsp = DPCPU_PTR(nws); 993 npwp = &nwsp->nws_work[proto]; 994 995 /*- 996 * We are willing to direct dispatch only if three conditions hold: 997 * 998 * (1) The netisr worker isn't already running, 999 * (2) Another thread isn't already directly dispatching, and 1000 * (3) The netisr hasn't already been woken up. 1001 */ 1002 NWS_LOCK(nwsp); 1003 if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 1004 error = netisr_queue_workstream(nwsp, proto, npwp, m, 1005 &dosignal); 1006 NWS_UNLOCK(nwsp); 1007 if (dosignal) 1008 NWS_SIGNAL(nwsp); 1009 goto out_unpin; 1010 } 1011 1012 /* 1013 * The current thread is now effectively the netisr worker, so set 1014 * the dispatching flag to prevent concurrent processing of the 1015 * stream from another thread (even the netisr worker), which could 1016 * otherwise lead to effective misordering of the stream. 1017 */ 1018 nwsp->nws_flags |= NWS_DISPATCHING; 1019 NWS_UNLOCK(nwsp); 1020 netisr_proto[proto].np_handler(m); 1021 NWS_LOCK(nwsp); 1022 nwsp->nws_flags &= ~NWS_DISPATCHING; 1023 npwp->nw_handled++; 1024 npwp->nw_hybrid_dispatched++; 1025 1026 /* 1027 * If other work was enqueued by another thread while we were direct 1028 * dispatching, we need to signal the netisr worker to do that work. 1029 * In the future, we might want to do some of that work in the 1030 * current thread, rather than trigger further context switches. If 1031 * so, we'll want to establish a reasonable bound on the work done in 1032 * the "borrowed" context. 1033 */ 1034 if (nwsp->nws_pendingbits != 0) { 1035 nwsp->nws_flags |= NWS_SCHEDULED; 1036 dosignal = 1; 1037 } else 1038 dosignal = 0; 1039 NWS_UNLOCK(nwsp); 1040 if (dosignal) 1041 NWS_SIGNAL(nwsp); 1042 error = 0; 1043 goto out_unpin; 1044 1045 queue_fallback: 1046 error = netisr_queue_internal(proto, m, cpuid); 1047 out_unpin: 1048 sched_unpin(); 1049 out_unlock: 1050 #ifdef NETISR_LOCKING 1051 NETISR_RUNLOCK(&tracker); 1052 #endif 1053 return (error); 1054 } 1055 1056 int 1057 netisr_dispatch(u_int proto, struct mbuf *m) 1058 { 1059 1060 return (netisr_dispatch_src(proto, 0, m)); 1061 } 1062 1063 #ifdef DEVICE_POLLING 1064 /* 1065 * Kernel polling borrows a netisr thread to run interface polling in; this 1066 * function allows kernel polling to request that the netisr thread be 1067 * scheduled even if no packets are pending for protocols. 1068 */ 1069 void 1070 netisr_sched_poll(void) 1071 { 1072 struct netisr_workstream *nwsp; 1073 1074 nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1075 NWS_SIGNAL(nwsp); 1076 } 1077 #endif 1078 1079 static void 1080 netisr_start_swi(u_int cpuid, struct pcpu *pc) 1081 { 1082 char swiname[12]; 1083 struct netisr_workstream *nwsp; 1084 int error; 1085 1086 KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 1087 1088 nwsp = DPCPU_ID_PTR(cpuid, nws); 1089 mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1090 nwsp->nws_cpu = cpuid; 1091 snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1092 error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1093 SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); 1094 if (error) 1095 panic("%s: swi_add %d", __func__, error); 1096 pc->pc_netisr = nwsp->nws_intr_event; 1097 if (netisr_bindthreads) { 1098 error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1099 if (error != 0) 1100 printf("%s: cpu %u: intr_event_bind: %d", __func__, 1101 cpuid, error); 1102 } 1103 NETISR_WLOCK(); 1104 nws_array[nws_count] = nwsp->nws_cpu; 1105 nws_count++; 1106 NETISR_WUNLOCK(); 1107 } 1108 1109 /* 1110 * Initialize the netisr subsystem. We rely on BSS and static initialization 1111 * of most fields in global data structures. 1112 * 1113 * Start a worker thread for the boot CPU so that we can support network 1114 * traffic immediately in case the network stack is used before additional 1115 * CPUs are started (for example, diskless boot). 1116 */ 1117 static void 1118 netisr_init(void *arg) 1119 { 1120 KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); 1121 1122 NETISR_LOCK_INIT(); 1123 if (netisr_maxthreads < 1) 1124 netisr_maxthreads = 1; 1125 if (netisr_maxthreads > mp_ncpus) { 1126 printf("netisr_init: forcing maxthreads from %d to %d\n", 1127 netisr_maxthreads, mp_ncpus); 1128 netisr_maxthreads = mp_ncpus; 1129 } 1130 if (netisr_defaultqlimit > netisr_maxqlimit) { 1131 printf("netisr_init: forcing defaultqlimit from %d to %d\n", 1132 netisr_defaultqlimit, netisr_maxqlimit); 1133 netisr_defaultqlimit = netisr_maxqlimit; 1134 } 1135 #ifdef DEVICE_POLLING 1136 /* 1137 * The device polling code is not yet aware of how to deal with 1138 * multiple netisr threads, so for the time being compiling in device 1139 * polling disables parallel netisr workers. 1140 */ 1141 if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1142 printf("netisr_init: forcing maxthreads to 1 and " 1143 "bindthreads to 0 for device polling\n"); 1144 netisr_maxthreads = 1; 1145 netisr_bindthreads = 0; 1146 } 1147 #endif 1148 netisr_start_swi(curcpu, pcpu_find(curcpu)); 1149 } 1150 SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1151 1152 /* 1153 * Start worker threads for additional CPUs. No attempt to gracefully handle 1154 * work reassignment, we don't yet support dynamic reconfiguration. 1155 */ 1156 static void 1157 netisr_start(void *arg) 1158 { 1159 struct pcpu *pc; 1160 1161 STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { 1162 if (nws_count >= netisr_maxthreads) 1163 break; 1164 /* XXXRW: Is skipping absent CPUs still required here? */ 1165 if (CPU_ABSENT(pc->pc_cpuid)) 1166 continue; 1167 /* Worker will already be present for boot CPU. */ 1168 if (pc->pc_netisr != NULL) 1169 continue; 1170 netisr_start_swi(pc->pc_cpuid, pc); 1171 } 1172 } 1173 SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1174 1175 /* 1176 * Sysctl monitoring for netisr: query a list of registered protocols. 1177 */ 1178 static int 1179 sysctl_netisr_proto(SYSCTL_HANDLER_ARGS) 1180 { 1181 struct rm_priotracker tracker; 1182 struct sysctl_netisr_proto *snpp, *snp_array; 1183 struct netisr_proto *npp; 1184 u_int counter, proto; 1185 int error; 1186 1187 if (req->newptr != NULL) 1188 return (EINVAL); 1189 snp_array = malloc(sizeof(*snp_array) * NETISR_MAXPROT, M_TEMP, 1190 M_ZERO | M_WAITOK); 1191 counter = 0; 1192 NETISR_RLOCK(&tracker); 1193 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1194 npp = &netisr_proto[proto]; 1195 if (npp->np_name == NULL) 1196 continue; 1197 snpp = &snp_array[counter]; 1198 snpp->snp_version = sizeof(*snpp); 1199 strlcpy(snpp->snp_name, npp->np_name, NETISR_NAMEMAXLEN); 1200 snpp->snp_proto = proto; 1201 snpp->snp_qlimit = npp->np_qlimit; 1202 snpp->snp_policy = npp->np_policy; 1203 snpp->snp_dispatch = npp->np_dispatch; 1204 if (npp->np_m2flow != NULL) 1205 snpp->snp_flags |= NETISR_SNP_FLAGS_M2FLOW; 1206 if (npp->np_m2cpuid != NULL) 1207 snpp->snp_flags |= NETISR_SNP_FLAGS_M2CPUID; 1208 if (npp->np_drainedcpu != NULL) 1209 snpp->snp_flags |= NETISR_SNP_FLAGS_DRAINEDCPU; 1210 counter++; 1211 } 1212 NETISR_RUNLOCK(&tracker); 1213 KASSERT(counter <= NETISR_MAXPROT, 1214 ("sysctl_netisr_proto: counter too big (%d)", counter)); 1215 error = SYSCTL_OUT(req, snp_array, sizeof(*snp_array) * counter); 1216 free(snp_array, M_TEMP); 1217 return (error); 1218 } 1219 1220 SYSCTL_PROC(_net_isr, OID_AUTO, proto, 1221 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_proto, 1222 "S,sysctl_netisr_proto", 1223 "Return list of protocols registered with netisr"); 1224 1225 /* 1226 * Sysctl monitoring for netisr: query a list of workstreams. 1227 */ 1228 static int 1229 sysctl_netisr_workstream(SYSCTL_HANDLER_ARGS) 1230 { 1231 struct rm_priotracker tracker; 1232 struct sysctl_netisr_workstream *snwsp, *snws_array; 1233 struct netisr_workstream *nwsp; 1234 u_int counter, cpuid; 1235 int error; 1236 1237 if (req->newptr != NULL) 1238 return (EINVAL); 1239 snws_array = malloc(sizeof(*snws_array) * MAXCPU, M_TEMP, 1240 M_ZERO | M_WAITOK); 1241 counter = 0; 1242 NETISR_RLOCK(&tracker); 1243 CPU_FOREACH(cpuid) { 1244 nwsp = DPCPU_ID_PTR(cpuid, nws); 1245 if (nwsp->nws_intr_event == NULL) 1246 continue; 1247 NWS_LOCK(nwsp); 1248 snwsp = &snws_array[counter]; 1249 snwsp->snws_version = sizeof(*snwsp); 1250 1251 /* 1252 * For now, we equate workstream IDs and CPU IDs in the 1253 * kernel, but expose them independently to userspace in case 1254 * that assumption changes in the future. 1255 */ 1256 snwsp->snws_wsid = cpuid; 1257 snwsp->snws_cpu = cpuid; 1258 if (nwsp->nws_intr_event != NULL) 1259 snwsp->snws_flags |= NETISR_SNWS_FLAGS_INTR; 1260 NWS_UNLOCK(nwsp); 1261 counter++; 1262 } 1263 NETISR_RUNLOCK(&tracker); 1264 KASSERT(counter <= MAXCPU, 1265 ("sysctl_netisr_workstream: counter too big (%d)", counter)); 1266 error = SYSCTL_OUT(req, snws_array, sizeof(*snws_array) * counter); 1267 free(snws_array, M_TEMP); 1268 return (error); 1269 } 1270 1271 SYSCTL_PROC(_net_isr, OID_AUTO, workstream, 1272 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_workstream, 1273 "S,sysctl_netisr_workstream", 1274 "Return list of workstreams implemented by netisr"); 1275 1276 /* 1277 * Sysctl monitoring for netisr: query per-protocol data across all 1278 * workstreams. 1279 */ 1280 static int 1281 sysctl_netisr_work(SYSCTL_HANDLER_ARGS) 1282 { 1283 struct rm_priotracker tracker; 1284 struct sysctl_netisr_work *snwp, *snw_array; 1285 struct netisr_workstream *nwsp; 1286 struct netisr_proto *npp; 1287 struct netisr_work *nwp; 1288 u_int counter, cpuid, proto; 1289 int error; 1290 1291 if (req->newptr != NULL) 1292 return (EINVAL); 1293 snw_array = malloc(sizeof(*snw_array) * MAXCPU * NETISR_MAXPROT, 1294 M_TEMP, M_ZERO | M_WAITOK); 1295 counter = 0; 1296 NETISR_RLOCK(&tracker); 1297 CPU_FOREACH(cpuid) { 1298 nwsp = DPCPU_ID_PTR(cpuid, nws); 1299 if (nwsp->nws_intr_event == NULL) 1300 continue; 1301 NWS_LOCK(nwsp); 1302 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1303 npp = &netisr_proto[proto]; 1304 if (npp->np_name == NULL) 1305 continue; 1306 nwp = &nwsp->nws_work[proto]; 1307 snwp = &snw_array[counter]; 1308 snwp->snw_version = sizeof(*snwp); 1309 snwp->snw_wsid = cpuid; /* See comment above. */ 1310 snwp->snw_proto = proto; 1311 snwp->snw_len = nwp->nw_len; 1312 snwp->snw_watermark = nwp->nw_watermark; 1313 snwp->snw_dispatched = nwp->nw_dispatched; 1314 snwp->snw_hybrid_dispatched = 1315 nwp->nw_hybrid_dispatched; 1316 snwp->snw_qdrops = nwp->nw_qdrops; 1317 snwp->snw_queued = nwp->nw_queued; 1318 snwp->snw_handled = nwp->nw_handled; 1319 counter++; 1320 } 1321 NWS_UNLOCK(nwsp); 1322 } 1323 KASSERT(counter <= MAXCPU * NETISR_MAXPROT, 1324 ("sysctl_netisr_work: counter too big (%d)", counter)); 1325 NETISR_RUNLOCK(&tracker); 1326 error = SYSCTL_OUT(req, snw_array, sizeof(*snw_array) * counter); 1327 free(snw_array, M_TEMP); 1328 return (error); 1329 } 1330 1331 SYSCTL_PROC(_net_isr, OID_AUTO, work, 1332 CTLFLAG_RD|CTLTYPE_STRUCT|CTLFLAG_MPSAFE, 0, 0, sysctl_netisr_work, 1333 "S,sysctl_netisr_work", 1334 "Return list of per-workstream, per-protocol work in netisr"); 1335 1336 #ifdef DDB 1337 DB_SHOW_COMMAND(netisr, db_show_netisr) 1338 { 1339 struct netisr_workstream *nwsp; 1340 struct netisr_work *nwp; 1341 int first, proto; 1342 u_int cpuid; 1343 1344 db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1345 "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 1346 CPU_FOREACH(cpuid) { 1347 nwsp = DPCPU_ID_PTR(cpuid, nws); 1348 if (nwsp->nws_intr_event == NULL) 1349 continue; 1350 first = 1; 1351 for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1352 if (netisr_proto[proto].np_handler == NULL) 1353 continue; 1354 nwp = &nwsp->nws_work[proto]; 1355 if (first) { 1356 db_printf("%3d ", cpuid); 1357 first = 0; 1358 } else 1359 db_printf("%3s ", ""); 1360 db_printf( 1361 "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1362 netisr_proto[proto].np_name, nwp->nw_len, 1363 nwp->nw_watermark, nwp->nw_qlimit, 1364 nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1365 nwp->nw_qdrops, nwp->nw_queued); 1366 } 1367 } 1368 } 1369 #endif 1370