11cafed39SJonathan Lemon /*- 2d4b5cae4SRobert Watson * Copyright (c) 2007-2009 Robert N. M. Watson 3e3b6e33cSJake Burkholder * All rights reserved. 4e3b6e33cSJake Burkholder * 5e3b6e33cSJake Burkholder * Redistribution and use in source and binary forms, with or without 6e3b6e33cSJake Burkholder * modification, are permitted provided that the following conditions 7e3b6e33cSJake Burkholder * are met: 8e3b6e33cSJake Burkholder * 1. Redistributions of source code must retain the above copyright 91cafed39SJonathan Lemon * notice, this list of conditions and the following disclaimer. 10e3b6e33cSJake Burkholder * 2. Redistributions in binary form must reproduce the above copyright 11e3b6e33cSJake Burkholder * notice, this list of conditions and the following disclaimer in the 12e3b6e33cSJake Burkholder * documentation and/or other materials provided with the distribution. 13e3b6e33cSJake Burkholder * 141cafed39SJonathan Lemon * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 151cafed39SJonathan Lemon * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 161cafed39SJonathan Lemon * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 171cafed39SJonathan Lemon * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 181cafed39SJonathan Lemon * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 191cafed39SJonathan Lemon * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 201cafed39SJonathan Lemon * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 211cafed39SJonathan Lemon * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 221cafed39SJonathan Lemon * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 231cafed39SJonathan Lemon * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 241cafed39SJonathan Lemon * SUCH DAMAGE. 25e3b6e33cSJake Burkholder */ 26e3b6e33cSJake Burkholder 27d4b5cae4SRobert Watson #include <sys/cdefs.h> 28d4b5cae4SRobert Watson __FBSDID("$FreeBSD$"); 29d4b5cae4SRobert Watson 30d4b5cae4SRobert Watson /* 31d4b5cae4SRobert Watson * netisr is a packet dispatch service, allowing synchronous (directly 32d4b5cae4SRobert Watson * dispatched) and asynchronous (deferred dispatch) processing of packets by 33d4b5cae4SRobert Watson * registered protocol handlers. Callers pass a protocol identifier and 34d4b5cae4SRobert Watson * packet to netisr, along with a direct dispatch hint, and work will either 35d4b5cae4SRobert Watson * be immediately processed with the registered handler, or passed to a 36d4b5cae4SRobert Watson * kernel software interrupt (SWI) thread for deferred dispatch. Callers 37d4b5cae4SRobert Watson * will generally select one or the other based on: 38d4b5cae4SRobert Watson * 39d4b5cae4SRobert Watson * - Might directly dispatching a netisr handler lead to code reentrance or 40d4b5cae4SRobert Watson * lock recursion, such as entering the socket code from the socket code. 41d4b5cae4SRobert Watson * - Might directly dispatching a netisr handler lead to recursive 42d4b5cae4SRobert Watson * processing, such as when decapsulating several wrapped layers of tunnel 43d4b5cae4SRobert Watson * information (IPSEC within IPSEC within ...). 44d4b5cae4SRobert Watson * 45d4b5cae4SRobert Watson * Maintaining ordering for protocol streams is a critical design concern. 46d4b5cae4SRobert Watson * Enforcing ordering limits the opportunity for concurrency, but maintains 47d4b5cae4SRobert Watson * the strong ordering requirements found in some protocols, such as TCP. Of 48d4b5cae4SRobert Watson * related concern is CPU affinity--it is desirable to process all data 49d4b5cae4SRobert Watson * associated with a particular stream on the same CPU over time in order to 50d4b5cae4SRobert Watson * avoid acquiring locks associated with the connection on different CPUs, 51d4b5cae4SRobert Watson * keep connection data in one cache, and to generally encourage associated 52d4b5cae4SRobert Watson * user threads to live on the same CPU as the stream. It's also desirable 53d4b5cae4SRobert Watson * to avoid lock migration and contention where locks are associated with 54d4b5cae4SRobert Watson * more than one flow. 55d4b5cae4SRobert Watson * 56d4b5cae4SRobert Watson * netisr supports several policy variations, represented by the 57d4b5cae4SRobert Watson * NETISR_POLICY_* constants, allowing protocols to play a varying role in 58d4b5cae4SRobert Watson * identifying flows, assigning work to CPUs, etc. These are described in 59d4b5cae4SRobert Watson * detail in netisr.h. 60d4b5cae4SRobert Watson */ 61d4b5cae4SRobert Watson 62d4b5cae4SRobert Watson #include "opt_ddb.h" 63f0796cd2SGleb Smirnoff #include "opt_device_polling.h" 641d8cd39eSRobert Watson 65e3b6e33cSJake Burkholder #include <sys/param.h> 66e3b6e33cSJake Burkholder #include <sys/bus.h> 67e3b6e33cSJake Burkholder #include <sys/kernel.h> 681cafed39SJonathan Lemon #include <sys/kthread.h> 69d4b5cae4SRobert Watson #include <sys/interrupt.h> 701cafed39SJonathan Lemon #include <sys/lock.h> 711cafed39SJonathan Lemon #include <sys/mbuf.h> 72d4b5cae4SRobert Watson #include <sys/mutex.h> 7353402767SRobert Watson #include <sys/pcpu.h> 74d4b5cae4SRobert Watson #include <sys/proc.h> 75d4b5cae4SRobert Watson #include <sys/rmlock.h> 76d4b5cae4SRobert Watson #include <sys/sched.h> 77d4b5cae4SRobert Watson #include <sys/smp.h> 781cafed39SJonathan Lemon #include <sys/socket.h> 79d4b5cae4SRobert Watson #include <sys/sysctl.h> 80d4b5cae4SRobert Watson #include <sys/systm.h> 81d4b5cae4SRobert Watson #include <sys/vimage.h> 82d4b5cae4SRobert Watson 83d4b5cae4SRobert Watson #ifdef DDB 84d4b5cae4SRobert Watson #include <ddb/ddb.h> 85d4b5cae4SRobert Watson #endif 861cafed39SJonathan Lemon 871cafed39SJonathan Lemon #include <net/if.h> 881cafed39SJonathan Lemon #include <net/if_var.h> 89e3b6e33cSJake Burkholder #include <net/netisr.h> 90e3b6e33cSJake Burkholder 91d4b5cae4SRobert Watson /*- 92d4b5cae4SRobert Watson * Synchronize use and modification of the registered netisr data structures; 93d4b5cae4SRobert Watson * acquire a read lock while modifying the set of registered protocols to 94d4b5cae4SRobert Watson * prevent partially registered or unregistered protocols from being run. 95d4b5cae4SRobert Watson * 96d4b5cae4SRobert Watson * The following data structures and fields are protected by this lock: 97d4b5cae4SRobert Watson * 98d4b5cae4SRobert Watson * - The np array, including all fields of struct netisr_proto. 99d4b5cae4SRobert Watson * - The nws array, including all fields of struct netisr_worker. 100d4b5cae4SRobert Watson * - The nws_array array. 101d4b5cae4SRobert Watson * 102d4b5cae4SRobert Watson * Note: the NETISR_LOCKING define controls whether read locks are acquired 103d4b5cae4SRobert Watson * in packet processing paths requiring netisr registration stability. This 104d4b5cae4SRobert Watson * is disabled by default as it can lead to a measurable performance 105d4b5cae4SRobert Watson * degradation even with rmlocks (3%-6% for loopback ping-pong traffic), and 106d4b5cae4SRobert Watson * because netisr registration and unregistration is extremely rare at 107d4b5cae4SRobert Watson * runtime. If it becomes more common, this decision should be revisited. 108d4b5cae4SRobert Watson * 109d4b5cae4SRobert Watson * XXXRW: rmlocks don't support assertions. 110d4b5cae4SRobert Watson */ 111d4b5cae4SRobert Watson static struct rmlock netisr_rmlock; 112d4b5cae4SRobert Watson #define NETISR_LOCK_INIT() rm_init_flags(&netisr_rmlock, "netisr", \ 113d4b5cae4SRobert Watson RM_NOWITNESS) 114d4b5cae4SRobert Watson #define NETISR_LOCK_ASSERT() 115d4b5cae4SRobert Watson #define NETISR_RLOCK(tracker) rm_rlock(&netisr_rmlock, (tracker)) 116d4b5cae4SRobert Watson #define NETISR_RUNLOCK(tracker) rm_runlock(&netisr_rmlock, (tracker)) 117d4b5cae4SRobert Watson #define NETISR_WLOCK() rm_wlock(&netisr_rmlock) 118d4b5cae4SRobert Watson #define NETISR_WUNLOCK() rm_wunlock(&netisr_rmlock) 119d4b5cae4SRobert Watson /* #define NETISR_LOCKING */ 120e3b6e33cSJake Burkholder 121d4b5cae4SRobert Watson SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr"); 1221cafed39SJonathan Lemon 123d4b5cae4SRobert Watson /*- 124d4b5cae4SRobert Watson * Three direct dispatch policies are supported: 125d4b5cae4SRobert Watson * 126d4b5cae4SRobert Watson * - Always defer: all work is scheduled for a netisr, regardless of context. 127d4b5cae4SRobert Watson * (!direct) 128d4b5cae4SRobert Watson * 129d4b5cae4SRobert Watson * - Hybrid: if the executing context allows direct dispatch, and we're 130d4b5cae4SRobert Watson * running on the CPU the work would be done on, then direct dispatch if it 131d4b5cae4SRobert Watson * wouldn't violate ordering constraints on the workstream. 132d4b5cae4SRobert Watson * (direct && !direct_force) 133d4b5cae4SRobert Watson * 134d4b5cae4SRobert Watson * - Always direct: if the executing context allows direct dispatch, always 135d4b5cae4SRobert Watson * direct dispatch. (direct && direct_force) 136d4b5cae4SRobert Watson * 137d4b5cae4SRobert Watson * Notice that changing the global policy could lead to short periods of 138d4b5cae4SRobert Watson * misordered processing, but this is considered acceptable as compared to 139d4b5cae4SRobert Watson * the complexity of enforcing ordering during policy changes. 140d4b5cae4SRobert Watson */ 141d4b5cae4SRobert Watson static int netisr_direct_force = 1; /* Always direct dispatch. */ 142d4b5cae4SRobert Watson TUNABLE_INT("net.isr.direct_force", &netisr_direct_force); 143d4b5cae4SRobert Watson SYSCTL_INT(_net_isr, OID_AUTO, direct_force, CTLFLAG_RW, 144d4b5cae4SRobert Watson &netisr_direct_force, 0, "Force direct dispatch"); 145e3b6e33cSJake Burkholder 146d4b5cae4SRobert Watson static int netisr_direct = 1; /* Enable direct dispatch. */ 147cea2165bSRobert Watson TUNABLE_INT("net.isr.direct", &netisr_direct); 148d4b5cae4SRobert Watson SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, 149d4b5cae4SRobert Watson &netisr_direct, 0, "Enable direct dispatch"); 1501cafed39SJonathan Lemon 1511cafed39SJonathan Lemon /* 152d4b5cae4SRobert Watson * Allow the administrator to limit the number of threads (CPUs) to use for 153d4b5cae4SRobert Watson * netisr. We don't check netisr_maxthreads before creating the thread for 154d4b5cae4SRobert Watson * CPU 0, so in practice we ignore values <= 1. This must be set at boot. 155d4b5cae4SRobert Watson * We will create at most one thread per CPU. 1565fd04e38SRobert Watson */ 1579e6e01ebSRobert Watson static int netisr_maxthreads = -1; /* Max number of threads. */ 158d4b5cae4SRobert Watson TUNABLE_INT("net.isr.maxthreads", &netisr_maxthreads); 159d4b5cae4SRobert Watson SYSCTL_INT(_net_isr, OID_AUTO, maxthreads, CTLFLAG_RD, 160d4b5cae4SRobert Watson &netisr_maxthreads, 0, 161d4b5cae4SRobert Watson "Use at most this many CPUs for netisr processing"); 1625fd04e38SRobert Watson 163d4b5cae4SRobert Watson static int netisr_bindthreads = 0; /* Bind threads to CPUs. */ 164d4b5cae4SRobert Watson TUNABLE_INT("net.isr.bindthreads", &netisr_bindthreads); 165d4b5cae4SRobert Watson SYSCTL_INT(_net_isr, OID_AUTO, bindthreads, CTLFLAG_RD, 166d4b5cae4SRobert Watson &netisr_bindthreads, 0, "Bind netisr threads to CPUs."); 167d4b5cae4SRobert Watson 168d4b5cae4SRobert Watson /* 169d4b5cae4SRobert Watson * Limit per-workstream queues to at most net.isr.maxqlimit, both for initial 170d4b5cae4SRobert Watson * configuration and later modification using netisr_setqlimit(). 171d4b5cae4SRobert Watson */ 172d4b5cae4SRobert Watson #define NETISR_DEFAULT_MAXQLIMIT 10240 173d4b5cae4SRobert Watson static u_int netisr_maxqlimit = NETISR_DEFAULT_MAXQLIMIT; 174d4b5cae4SRobert Watson TUNABLE_INT("net.isr.maxqlimit", &netisr_maxqlimit); 175d4b5cae4SRobert Watson SYSCTL_INT(_net_isr, OID_AUTO, maxqlimit, CTLFLAG_RD, 176d4b5cae4SRobert Watson &netisr_maxqlimit, 0, 177d4b5cae4SRobert Watson "Maximum netisr per-protocol, per-CPU queue depth."); 178d4b5cae4SRobert Watson 179d4b5cae4SRobert Watson /* 180d4b5cae4SRobert Watson * The default per-workstream queue limit for protocols that don't initialize 181d4b5cae4SRobert Watson * the nh_qlimit field of their struct netisr_handler. If this is set above 182d4b5cae4SRobert Watson * netisr_maxqlimit, we truncate it to the maximum during boot. 183d4b5cae4SRobert Watson */ 184d4b5cae4SRobert Watson #define NETISR_DEFAULT_DEFAULTQLIMIT 256 185d4b5cae4SRobert Watson static u_int netisr_defaultqlimit = NETISR_DEFAULT_DEFAULTQLIMIT; 186d4b5cae4SRobert Watson TUNABLE_INT("net.isr.defaultqlimit", &netisr_defaultqlimit); 187d4b5cae4SRobert Watson SYSCTL_INT(_net_isr, OID_AUTO, defaultqlimit, CTLFLAG_RD, 188d4b5cae4SRobert Watson &netisr_defaultqlimit, 0, 189d4b5cae4SRobert Watson "Default netisr per-protocol, per-CPU queue limit if not set by protocol"); 190d4b5cae4SRobert Watson 191d4b5cae4SRobert Watson /* 192d4b5cae4SRobert Watson * Each protocol is described by a struct netisr_proto, which holds all 193d4b5cae4SRobert Watson * global per-protocol information. This data structure is set up by 194d4b5cae4SRobert Watson * netisr_register(), and derived from the public struct netisr_handler. 195d4b5cae4SRobert Watson */ 196d4b5cae4SRobert Watson struct netisr_proto { 197d4b5cae4SRobert Watson const char *np_name; /* Character string protocol name. */ 198d4b5cae4SRobert Watson netisr_handler_t *np_handler; /* Protocol handler. */ 199d4b5cae4SRobert Watson netisr_m2flow_t *np_m2flow; /* Query flow for untagged packet. */ 200d4b5cae4SRobert Watson netisr_m2cpuid_t *np_m2cpuid; /* Query CPU to process packet on. */ 201ed655c8cSBjoern A. Zeeb netisr_drainedcpu_t *np_drainedcpu; /* Callback when drained a queue. */ 202d4b5cae4SRobert Watson u_int np_qlimit; /* Maximum per-CPU queue depth. */ 203d4b5cae4SRobert Watson u_int np_policy; /* Work placement policy. */ 204d4b5cae4SRobert Watson }; 205d4b5cae4SRobert Watson 206ed54411cSRobert Watson #define NETISR_MAXPROT 16 /* Compile-time limit. */ 207d4b5cae4SRobert Watson 208d4b5cae4SRobert Watson /* 209d4b5cae4SRobert Watson * The np array describes all registered protocols, indexed by protocol 210d4b5cae4SRobert Watson * number. 211d4b5cae4SRobert Watson */ 212d4b5cae4SRobert Watson static struct netisr_proto np[NETISR_MAXPROT]; 213d4b5cae4SRobert Watson 214d4b5cae4SRobert Watson /* 215d4b5cae4SRobert Watson * Protocol-specific work for each workstream is described by struct 216d4b5cae4SRobert Watson * netisr_work. Each work descriptor consists of an mbuf queue and 217d4b5cae4SRobert Watson * statistics. 218d4b5cae4SRobert Watson */ 219d4b5cae4SRobert Watson struct netisr_work { 220d4b5cae4SRobert Watson /* 221d4b5cae4SRobert Watson * Packet queue, linked by m_nextpkt. 222d4b5cae4SRobert Watson */ 223d4b5cae4SRobert Watson struct mbuf *nw_head; 224d4b5cae4SRobert Watson struct mbuf *nw_tail; 225d4b5cae4SRobert Watson u_int nw_len; 226d4b5cae4SRobert Watson u_int nw_qlimit; 227d4b5cae4SRobert Watson u_int nw_watermark; 228d4b5cae4SRobert Watson 229d4b5cae4SRobert Watson /* 230d4b5cae4SRobert Watson * Statistics -- written unlocked, but mostly from curcpu. 231d4b5cae4SRobert Watson */ 232d4b5cae4SRobert Watson u_int64_t nw_dispatched; /* Number of direct dispatches. */ 233d4b5cae4SRobert Watson u_int64_t nw_hybrid_dispatched; /* "" hybrid dispatches. */ 234d4b5cae4SRobert Watson u_int64_t nw_qdrops; /* "" drops. */ 235d4b5cae4SRobert Watson u_int64_t nw_queued; /* "" enqueues. */ 236d4b5cae4SRobert Watson u_int64_t nw_handled; /* "" handled in worker. */ 237d4b5cae4SRobert Watson }; 238d4b5cae4SRobert Watson 239d4b5cae4SRobert Watson /* 240d4b5cae4SRobert Watson * Workstreams hold a set of ordered work across each protocol, and are 241d4b5cae4SRobert Watson * described by netisr_workstream. Each workstream is associated with a 242d4b5cae4SRobert Watson * worker thread, which in turn is pinned to a CPU. Work associated with a 243d4b5cae4SRobert Watson * workstream can be processd in other threads during direct dispatch; 244d4b5cae4SRobert Watson * concurrent processing is prevented by the NWS_RUNNING flag, which 245d4b5cae4SRobert Watson * indicates that a thread is already processing the work queue. 246d4b5cae4SRobert Watson */ 247d4b5cae4SRobert Watson struct netisr_workstream { 248d4b5cae4SRobert Watson struct intr_event *nws_intr_event; /* Handler for stream. */ 249d4b5cae4SRobert Watson void *nws_swi_cookie; /* swi(9) cookie for stream. */ 250d4b5cae4SRobert Watson struct mtx nws_mtx; /* Synchronize work. */ 251d4b5cae4SRobert Watson u_int nws_cpu; /* CPU pinning. */ 252d4b5cae4SRobert Watson u_int nws_flags; /* Wakeup flags. */ 253d4b5cae4SRobert Watson u_int nws_pendingbits; /* Scheduled protocols. */ 254d4b5cae4SRobert Watson 255d4b5cae4SRobert Watson /* 256d4b5cae4SRobert Watson * Each protocol has per-workstream data. 257d4b5cae4SRobert Watson */ 258d4b5cae4SRobert Watson struct netisr_work nws_work[NETISR_MAXPROT]; 259d4b5cae4SRobert Watson } __aligned(CACHE_LINE_SIZE); 260d4b5cae4SRobert Watson 261d4b5cae4SRobert Watson /* 26253402767SRobert Watson * Per-CPU workstream data. 263d4b5cae4SRobert Watson */ 26453402767SRobert Watson DPCPU_DEFINE(struct netisr_workstream, nws); 265d4b5cae4SRobert Watson 266d4b5cae4SRobert Watson /* 267d4b5cae4SRobert Watson * Map contiguous values between 0 and nws_count into CPU IDs appropriate for 26853402767SRobert Watson * accessing workstreams. This allows constructions of the form 26953402767SRobert Watson * DPCPU_ID_GET(nws_array[arbitraryvalue % nws_count], nws). 270d4b5cae4SRobert Watson */ 271d4b5cae4SRobert Watson static u_int nws_array[MAXCPU]; 272d4b5cae4SRobert Watson 273d4b5cae4SRobert Watson /* 274d4b5cae4SRobert Watson * Number of registered workstreams. Will be at most the number of running 275d4b5cae4SRobert Watson * CPUs once fully started. 276d4b5cae4SRobert Watson */ 277d4b5cae4SRobert Watson static u_int nws_count; 278d4b5cae4SRobert Watson SYSCTL_INT(_net_isr, OID_AUTO, numthreads, CTLFLAG_RD, 279d4b5cae4SRobert Watson &nws_count, 0, "Number of extant netisr threads."); 280d4b5cae4SRobert Watson 281d4b5cae4SRobert Watson /* 282d4b5cae4SRobert Watson * Per-workstream flags. 283d4b5cae4SRobert Watson */ 284d4b5cae4SRobert Watson #define NWS_RUNNING 0x00000001 /* Currently running in a thread. */ 285d4b5cae4SRobert Watson #define NWS_DISPATCHING 0x00000002 /* Currently being direct-dispatched. */ 286d4b5cae4SRobert Watson #define NWS_SCHEDULED 0x00000004 /* Signal issued. */ 287d4b5cae4SRobert Watson 288d4b5cae4SRobert Watson /* 289d4b5cae4SRobert Watson * Synchronization for each workstream: a mutex protects all mutable fields 290d4b5cae4SRobert Watson * in each stream, including per-protocol state (mbuf queues). The SWI is 291d4b5cae4SRobert Watson * woken up if asynchronous dispatch is required. 292d4b5cae4SRobert Watson */ 293d4b5cae4SRobert Watson #define NWS_LOCK(s) mtx_lock(&(s)->nws_mtx) 294d4b5cae4SRobert Watson #define NWS_LOCK_ASSERT(s) mtx_assert(&(s)->nws_mtx, MA_OWNED) 295d4b5cae4SRobert Watson #define NWS_UNLOCK(s) mtx_unlock(&(s)->nws_mtx) 296d4b5cae4SRobert Watson #define NWS_SIGNAL(s) swi_sched((s)->nws_swi_cookie, 0) 297d4b5cae4SRobert Watson 298d4b5cae4SRobert Watson /* 299d4b5cae4SRobert Watson * Utility routines for protocols that implement their own mapping of flows 300d4b5cae4SRobert Watson * to CPUs. 301d4b5cae4SRobert Watson */ 302d4b5cae4SRobert Watson u_int 303d4b5cae4SRobert Watson netisr_get_cpucount(void) 304d4b5cae4SRobert Watson { 305d4b5cae4SRobert Watson 306d4b5cae4SRobert Watson return (nws_count); 3075fd04e38SRobert Watson } 308d4b5cae4SRobert Watson 309d4b5cae4SRobert Watson u_int 310d4b5cae4SRobert Watson netisr_get_cpuid(u_int cpunumber) 311d4b5cae4SRobert Watson { 312d4b5cae4SRobert Watson 313d4b5cae4SRobert Watson KASSERT(cpunumber < nws_count, ("%s: %u > %u", __func__, cpunumber, 314d4b5cae4SRobert Watson nws_count)); 315d4b5cae4SRobert Watson 316d4b5cae4SRobert Watson return (nws_array[cpunumber]); 3175fd04e38SRobert Watson } 3185fd04e38SRobert Watson 3195fd04e38SRobert Watson /* 320d4b5cae4SRobert Watson * The default implementation of -> CPU ID mapping. 321d4b5cae4SRobert Watson * 322d4b5cae4SRobert Watson * Non-static so that protocols can use it to map their own work to specific 323d4b5cae4SRobert Watson * CPUs in a manner consistent to netisr for affinity purposes. 324d4b5cae4SRobert Watson */ 325d4b5cae4SRobert Watson u_int 326d4b5cae4SRobert Watson netisr_default_flow2cpu(u_int flowid) 327d4b5cae4SRobert Watson { 328d4b5cae4SRobert Watson 329d4b5cae4SRobert Watson return (nws_array[flowid % nws_count]); 330d4b5cae4SRobert Watson } 331d4b5cae4SRobert Watson 332d4b5cae4SRobert Watson /* 333d4b5cae4SRobert Watson * Register a new netisr handler, which requires initializing per-protocol 334d4b5cae4SRobert Watson * fields for each workstream. All netisr work is briefly suspended while 335d4b5cae4SRobert Watson * the protocol is installed. 3361cafed39SJonathan Lemon */ 3371cafed39SJonathan Lemon void 338d4b5cae4SRobert Watson netisr_register(const struct netisr_handler *nhp) 3391cafed39SJonathan Lemon { 340d4b5cae4SRobert Watson struct netisr_work *npwp; 341d4b5cae4SRobert Watson const char *name; 342d4b5cae4SRobert Watson u_int i, proto; 3431cafed39SJonathan Lemon 344d4b5cae4SRobert Watson proto = nhp->nh_proto; 345d4b5cae4SRobert Watson name = nhp->nh_name; 34659dd72d0SRobert Watson 3477902224cSSam Leffler /* 348d4b5cae4SRobert Watson * Test that the requested registration is valid. 3497902224cSSam Leffler */ 350d4b5cae4SRobert Watson KASSERT(nhp->nh_name != NULL, 351d4b5cae4SRobert Watson ("%s: nh_name NULL for %u", __func__, proto)); 352d4b5cae4SRobert Watson KASSERT(nhp->nh_handler != NULL, 353d4b5cae4SRobert Watson ("%s: nh_handler NULL for %s", __func__, name)); 354d4b5cae4SRobert Watson KASSERT(nhp->nh_policy == NETISR_POLICY_SOURCE || 355d4b5cae4SRobert Watson nhp->nh_policy == NETISR_POLICY_FLOW || 356d4b5cae4SRobert Watson nhp->nh_policy == NETISR_POLICY_CPU, 357d4b5cae4SRobert Watson ("%s: unsupported nh_policy %u for %s", __func__, 358d4b5cae4SRobert Watson nhp->nh_policy, name)); 359d4b5cae4SRobert Watson KASSERT(nhp->nh_policy == NETISR_POLICY_FLOW || 360d4b5cae4SRobert Watson nhp->nh_m2flow == NULL, 361d4b5cae4SRobert Watson ("%s: nh_policy != FLOW but m2flow defined for %s", __func__, 362d4b5cae4SRobert Watson name)); 363d4b5cae4SRobert Watson KASSERT(nhp->nh_policy == NETISR_POLICY_CPU || nhp->nh_m2cpuid == NULL, 364d4b5cae4SRobert Watson ("%s: nh_policy != CPU but m2cpuid defined for %s", __func__, 365d4b5cae4SRobert Watson name)); 366d4b5cae4SRobert Watson KASSERT(nhp->nh_policy != NETISR_POLICY_CPU || nhp->nh_m2cpuid != NULL, 367d4b5cae4SRobert Watson ("%s: nh_policy == CPU but m2cpuid not defined for %s", __func__, 368d4b5cae4SRobert Watson name)); 369d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 370d4b5cae4SRobert Watson ("%s(%u, %s): protocol too big", __func__, proto, name)); 371d4b5cae4SRobert Watson 372d4b5cae4SRobert Watson /* 373d4b5cae4SRobert Watson * Test that no existing registration exists for this protocol. 374d4b5cae4SRobert Watson */ 375d4b5cae4SRobert Watson NETISR_WLOCK(); 376d4b5cae4SRobert Watson KASSERT(np[proto].np_name == NULL, 377d4b5cae4SRobert Watson ("%s(%u, %s): name present", __func__, proto, name)); 378d4b5cae4SRobert Watson KASSERT(np[proto].np_handler == NULL, 379d4b5cae4SRobert Watson ("%s(%u, %s): handler present", __func__, proto, name)); 380d4b5cae4SRobert Watson 381d4b5cae4SRobert Watson np[proto].np_name = name; 382d4b5cae4SRobert Watson np[proto].np_handler = nhp->nh_handler; 383d4b5cae4SRobert Watson np[proto].np_m2flow = nhp->nh_m2flow; 384d4b5cae4SRobert Watson np[proto].np_m2cpuid = nhp->nh_m2cpuid; 385ed655c8cSBjoern A. Zeeb np[proto].np_drainedcpu = nhp->nh_drainedcpu; 386d4b5cae4SRobert Watson if (nhp->nh_qlimit == 0) 387d4b5cae4SRobert Watson np[proto].np_qlimit = netisr_defaultqlimit; 388d4b5cae4SRobert Watson else if (nhp->nh_qlimit > netisr_maxqlimit) { 389d4b5cae4SRobert Watson printf("%s: %s requested queue limit %u capped to " 390d4b5cae4SRobert Watson "net.isr.maxqlimit %u\n", __func__, name, nhp->nh_qlimit, 391d4b5cae4SRobert Watson netisr_maxqlimit); 392d4b5cae4SRobert Watson np[proto].np_qlimit = netisr_maxqlimit; 393d4b5cae4SRobert Watson } else 394d4b5cae4SRobert Watson np[proto].np_qlimit = nhp->nh_qlimit; 395d4b5cae4SRobert Watson np[proto].np_policy = nhp->nh_policy; 3969e6e01ebSRobert Watson for (i = 0; i <= mp_maxid; i++) { 39753402767SRobert Watson if (CPU_ABSENT(i)) 39853402767SRobert Watson continue; 39953402767SRobert Watson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 400d4b5cae4SRobert Watson bzero(npwp, sizeof(*npwp)); 401d4b5cae4SRobert Watson npwp->nw_qlimit = np[proto].np_qlimit; 4021cafed39SJonathan Lemon } 403d4b5cae4SRobert Watson NETISR_WUNLOCK(); 4041cafed39SJonathan Lemon } 4051cafed39SJonathan Lemon 4061cafed39SJonathan Lemon /* 407d4b5cae4SRobert Watson * Clear drop counters across all workstreams for a protocol. 408d4b5cae4SRobert Watson */ 409d4b5cae4SRobert Watson void 410d4b5cae4SRobert Watson netisr_clearqdrops(const struct netisr_handler *nhp) 411d4b5cae4SRobert Watson { 412d4b5cae4SRobert Watson struct netisr_work *npwp; 413d4b5cae4SRobert Watson #ifdef INVARIANTS 414d4b5cae4SRobert Watson const char *name; 415d4b5cae4SRobert Watson #endif 416d4b5cae4SRobert Watson u_int i, proto; 417d4b5cae4SRobert Watson 418d4b5cae4SRobert Watson proto = nhp->nh_proto; 419d4b5cae4SRobert Watson #ifdef INVARIANTS 420d4b5cae4SRobert Watson name = nhp->nh_name; 421d4b5cae4SRobert Watson #endif 422d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 423d4b5cae4SRobert Watson ("%s(%u): protocol too big for %s", __func__, proto, name)); 424d4b5cae4SRobert Watson 425d4b5cae4SRobert Watson NETISR_WLOCK(); 426d4b5cae4SRobert Watson KASSERT(np[proto].np_handler != NULL, 427d4b5cae4SRobert Watson ("%s(%u): protocol not registered for %s", __func__, proto, 428d4b5cae4SRobert Watson name)); 429d4b5cae4SRobert Watson 4309e6e01ebSRobert Watson for (i = 0; i <= mp_maxid; i++) { 43153402767SRobert Watson if (CPU_ABSENT(i)) 43253402767SRobert Watson continue; 43353402767SRobert Watson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 434d4b5cae4SRobert Watson npwp->nw_qdrops = 0; 435d4b5cae4SRobert Watson } 436d4b5cae4SRobert Watson NETISR_WUNLOCK(); 437d4b5cae4SRobert Watson } 438d4b5cae4SRobert Watson 439d4b5cae4SRobert Watson /* 440d4b5cae4SRobert Watson * Query the current drop counters across all workstreams for a protocol. 441d4b5cae4SRobert Watson */ 442d4b5cae4SRobert Watson void 443d4b5cae4SRobert Watson netisr_getqdrops(const struct netisr_handler *nhp, u_int64_t *qdropp) 444d4b5cae4SRobert Watson { 445d4b5cae4SRobert Watson struct netisr_work *npwp; 446d4b5cae4SRobert Watson struct rm_priotracker tracker; 447d4b5cae4SRobert Watson #ifdef INVARIANTS 448d4b5cae4SRobert Watson const char *name; 449d4b5cae4SRobert Watson #endif 450d4b5cae4SRobert Watson u_int i, proto; 451d4b5cae4SRobert Watson 452d4b5cae4SRobert Watson *qdropp = 0; 453d4b5cae4SRobert Watson proto = nhp->nh_proto; 454d4b5cae4SRobert Watson #ifdef INVARIANTS 455d4b5cae4SRobert Watson name = nhp->nh_name; 456d4b5cae4SRobert Watson #endif 457d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 458d4b5cae4SRobert Watson ("%s(%u): protocol too big for %s", __func__, proto, name)); 459d4b5cae4SRobert Watson 460d4b5cae4SRobert Watson NETISR_RLOCK(&tracker); 461d4b5cae4SRobert Watson KASSERT(np[proto].np_handler != NULL, 462d4b5cae4SRobert Watson ("%s(%u): protocol not registered for %s", __func__, proto, 463d4b5cae4SRobert Watson name)); 464d4b5cae4SRobert Watson 4659e6e01ebSRobert Watson for (i = 0; i <= mp_maxid; i++) { 46653402767SRobert Watson if (CPU_ABSENT(i)) 46753402767SRobert Watson continue; 46853402767SRobert Watson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 469d4b5cae4SRobert Watson *qdropp += npwp->nw_qdrops; 470d4b5cae4SRobert Watson } 471d4b5cae4SRobert Watson NETISR_RUNLOCK(&tracker); 472d4b5cae4SRobert Watson } 473d4b5cae4SRobert Watson 474d4b5cae4SRobert Watson /* 475d4b5cae4SRobert Watson * Query the current queue limit for per-workstream queues for a protocol. 476d4b5cae4SRobert Watson */ 477d4b5cae4SRobert Watson void 478d4b5cae4SRobert Watson netisr_getqlimit(const struct netisr_handler *nhp, u_int *qlimitp) 479d4b5cae4SRobert Watson { 480d4b5cae4SRobert Watson struct rm_priotracker tracker; 481d4b5cae4SRobert Watson #ifdef INVARIANTS 482d4b5cae4SRobert Watson const char *name; 483d4b5cae4SRobert Watson #endif 484d4b5cae4SRobert Watson u_int proto; 485d4b5cae4SRobert Watson 486d4b5cae4SRobert Watson proto = nhp->nh_proto; 487d4b5cae4SRobert Watson #ifdef INVARIANTS 488d4b5cae4SRobert Watson name = nhp->nh_name; 489d4b5cae4SRobert Watson #endif 490d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 491d4b5cae4SRobert Watson ("%s(%u): protocol too big for %s", __func__, proto, name)); 492d4b5cae4SRobert Watson 493d4b5cae4SRobert Watson NETISR_RLOCK(&tracker); 494d4b5cae4SRobert Watson KASSERT(np[proto].np_handler != NULL, 495d4b5cae4SRobert Watson ("%s(%u): protocol not registered for %s", __func__, proto, 496d4b5cae4SRobert Watson name)); 497d4b5cae4SRobert Watson *qlimitp = np[proto].np_qlimit; 498d4b5cae4SRobert Watson NETISR_RUNLOCK(&tracker); 499d4b5cae4SRobert Watson } 500d4b5cae4SRobert Watson 501d4b5cae4SRobert Watson /* 502d4b5cae4SRobert Watson * Update the queue limit across per-workstream queues for a protocol. We 503d4b5cae4SRobert Watson * simply change the limits, and don't drain overflowed packets as they will 504d4b5cae4SRobert Watson * (hopefully) take care of themselves shortly. 5051cafed39SJonathan Lemon */ 5061cafed39SJonathan Lemon int 507d4b5cae4SRobert Watson netisr_setqlimit(const struct netisr_handler *nhp, u_int qlimit) 5081cafed39SJonathan Lemon { 509d4b5cae4SRobert Watson struct netisr_work *npwp; 510d4b5cae4SRobert Watson #ifdef INVARIANTS 511d4b5cae4SRobert Watson const char *name; 512d4b5cae4SRobert Watson #endif 513d4b5cae4SRobert Watson u_int i, proto; 5141cafed39SJonathan Lemon 515d4b5cae4SRobert Watson if (qlimit > netisr_maxqlimit) 516d4b5cae4SRobert Watson return (EINVAL); 517d4b5cae4SRobert Watson 518d4b5cae4SRobert Watson proto = nhp->nh_proto; 519d4b5cae4SRobert Watson #ifdef INVARIANTS 520d4b5cae4SRobert Watson name = nhp->nh_name; 521d4b5cae4SRobert Watson #endif 522d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 523d4b5cae4SRobert Watson ("%s(%u): protocol too big for %s", __func__, proto, name)); 524d4b5cae4SRobert Watson 525d4b5cae4SRobert Watson NETISR_WLOCK(); 526d4b5cae4SRobert Watson KASSERT(np[proto].np_handler != NULL, 527d4b5cae4SRobert Watson ("%s(%u): protocol not registered for %s", __func__, proto, 528d4b5cae4SRobert Watson name)); 529d4b5cae4SRobert Watson 530d4b5cae4SRobert Watson np[proto].np_qlimit = qlimit; 5319e6e01ebSRobert Watson for (i = 0; i <= mp_maxid; i++) { 53253402767SRobert Watson if (CPU_ABSENT(i)) 53353402767SRobert Watson continue; 53453402767SRobert Watson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 535d4b5cae4SRobert Watson npwp->nw_qlimit = qlimit; 536fb68148fSJonathan Lemon } 537d4b5cae4SRobert Watson NETISR_WUNLOCK(); 5383161f583SAndre Oppermann return (0); 539e3b6e33cSJake Burkholder } 540e3b6e33cSJake Burkholder 541d4b5cae4SRobert Watson /* 542d4b5cae4SRobert Watson * Drain all packets currently held in a particular protocol work queue. 543d4b5cae4SRobert Watson */ 544e3b6e33cSJake Burkholder static void 545d4b5cae4SRobert Watson netisr_drain_proto(struct netisr_work *npwp) 546e3b6e33cSJake Burkholder { 547d4b5cae4SRobert Watson struct mbuf *m; 548d4b5cae4SRobert Watson 549d4b5cae4SRobert Watson /* 550d4b5cae4SRobert Watson * We would assert the lock on the workstream but it's not passed in. 551d4b5cae4SRobert Watson */ 552d4b5cae4SRobert Watson while ((m = npwp->nw_head) != NULL) { 553d4b5cae4SRobert Watson npwp->nw_head = m->m_nextpkt; 554d4b5cae4SRobert Watson m->m_nextpkt = NULL; 555d4b5cae4SRobert Watson if (npwp->nw_head == NULL) 556d4b5cae4SRobert Watson npwp->nw_tail = NULL; 557d4b5cae4SRobert Watson npwp->nw_len--; 558d4b5cae4SRobert Watson m_freem(m); 559d4b5cae4SRobert Watson } 560d4b5cae4SRobert Watson KASSERT(npwp->nw_tail == NULL, ("%s: tail", __func__)); 561d4b5cae4SRobert Watson KASSERT(npwp->nw_len == 0, ("%s: len", __func__)); 562d4b5cae4SRobert Watson } 563d4b5cae4SRobert Watson 564d4b5cae4SRobert Watson /* 565d4b5cae4SRobert Watson * Remove the registration of a network protocol, which requires clearing 566d4b5cae4SRobert Watson * per-protocol fields across all workstreams, including freeing all mbufs in 567d4b5cae4SRobert Watson * the queues at time of unregister. All work in netisr is briefly suspended 568d4b5cae4SRobert Watson * while this takes place. 569d4b5cae4SRobert Watson */ 570d4b5cae4SRobert Watson void 571d4b5cae4SRobert Watson netisr_unregister(const struct netisr_handler *nhp) 572d4b5cae4SRobert Watson { 573d4b5cae4SRobert Watson struct netisr_work *npwp; 574d4b5cae4SRobert Watson #ifdef INVARIANTS 575d4b5cae4SRobert Watson const char *name; 576d4b5cae4SRobert Watson #endif 577d4b5cae4SRobert Watson u_int i, proto; 578d4b5cae4SRobert Watson 579d4b5cae4SRobert Watson proto = nhp->nh_proto; 580d4b5cae4SRobert Watson #ifdef INVARIANTS 581d4b5cae4SRobert Watson name = nhp->nh_name; 582d4b5cae4SRobert Watson #endif 583d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 584d4b5cae4SRobert Watson ("%s(%u): protocol too big for %s", __func__, proto, name)); 585d4b5cae4SRobert Watson 586d4b5cae4SRobert Watson NETISR_WLOCK(); 587d4b5cae4SRobert Watson KASSERT(np[proto].np_handler != NULL, 588d4b5cae4SRobert Watson ("%s(%u): protocol not registered for %s", __func__, proto, 589d4b5cae4SRobert Watson name)); 590d4b5cae4SRobert Watson 591d4b5cae4SRobert Watson np[proto].np_name = NULL; 592d4b5cae4SRobert Watson np[proto].np_handler = NULL; 593d4b5cae4SRobert Watson np[proto].np_m2flow = NULL; 594d4b5cae4SRobert Watson np[proto].np_m2cpuid = NULL; 595d4b5cae4SRobert Watson np[proto].np_qlimit = 0; 596d4b5cae4SRobert Watson np[proto].np_policy = 0; 5979e6e01ebSRobert Watson for (i = 0; i <= mp_maxid; i++) { 59853402767SRobert Watson if (CPU_ABSENT(i)) 59953402767SRobert Watson continue; 60053402767SRobert Watson npwp = &(DPCPU_ID_PTR(i, nws))->nws_work[proto]; 601d4b5cae4SRobert Watson netisr_drain_proto(npwp); 602d4b5cae4SRobert Watson bzero(npwp, sizeof(*npwp)); 603d4b5cae4SRobert Watson } 604d4b5cae4SRobert Watson NETISR_WUNLOCK(); 605d4b5cae4SRobert Watson } 606d4b5cae4SRobert Watson 607d4b5cae4SRobert Watson /* 608d4b5cae4SRobert Watson * Look up the workstream given a packet and source identifier. Do this by 609d4b5cae4SRobert Watson * checking the protocol's policy, and optionally call out to the protocol 610d4b5cae4SRobert Watson * for assistance if required. 611d4b5cae4SRobert Watson */ 612d4b5cae4SRobert Watson static struct mbuf * 613d4b5cae4SRobert Watson netisr_select_cpuid(struct netisr_proto *npp, uintptr_t source, 614d4b5cae4SRobert Watson struct mbuf *m, u_int *cpuidp) 615d4b5cae4SRobert Watson { 616d4b5cae4SRobert Watson struct ifnet *ifp; 617d4b5cae4SRobert Watson 618d4b5cae4SRobert Watson NETISR_LOCK_ASSERT(); 619d4b5cae4SRobert Watson 620d4b5cae4SRobert Watson /* 621d4b5cae4SRobert Watson * In the event we have only one worker, shortcut and deliver to it 622d4b5cae4SRobert Watson * without further ado. 623d4b5cae4SRobert Watson */ 624d4b5cae4SRobert Watson if (nws_count == 1) { 625d4b5cae4SRobert Watson *cpuidp = nws_array[0]; 626d4b5cae4SRobert Watson return (m); 627d4b5cae4SRobert Watson } 628d4b5cae4SRobert Watson 629d4b5cae4SRobert Watson /* 630d4b5cae4SRobert Watson * What happens next depends on the policy selected by the protocol. 631d4b5cae4SRobert Watson * If we want to support per-interface policies, we should do that 632d4b5cae4SRobert Watson * here first. 633d4b5cae4SRobert Watson */ 634d4b5cae4SRobert Watson switch (npp->np_policy) { 635d4b5cae4SRobert Watson case NETISR_POLICY_CPU: 636d4b5cae4SRobert Watson return (npp->np_m2cpuid(m, source, cpuidp)); 637d4b5cae4SRobert Watson 638d4b5cae4SRobert Watson case NETISR_POLICY_FLOW: 639d4b5cae4SRobert Watson if (!(m->m_flags & M_FLOWID) && npp->np_m2flow != NULL) { 640d4b5cae4SRobert Watson m = npp->np_m2flow(m, source); 641d4b5cae4SRobert Watson if (m == NULL) 642d4b5cae4SRobert Watson return (NULL); 643d4b5cae4SRobert Watson } 644d4b5cae4SRobert Watson if (m->m_flags & M_FLOWID) { 645d4b5cae4SRobert Watson *cpuidp = 646d4b5cae4SRobert Watson netisr_default_flow2cpu(m->m_pkthdr.flowid); 647d4b5cae4SRobert Watson return (m); 648d4b5cae4SRobert Watson } 649d4b5cae4SRobert Watson /* FALLTHROUGH */ 650d4b5cae4SRobert Watson 651d4b5cae4SRobert Watson case NETISR_POLICY_SOURCE: 652d4b5cae4SRobert Watson ifp = m->m_pkthdr.rcvif; 653d4b5cae4SRobert Watson if (ifp != NULL) 654d4b5cae4SRobert Watson *cpuidp = nws_array[(ifp->if_index + source) % 655d4b5cae4SRobert Watson nws_count]; 656d4b5cae4SRobert Watson else 657d4b5cae4SRobert Watson *cpuidp = nws_array[source % nws_count]; 658d4b5cae4SRobert Watson return (m); 659d4b5cae4SRobert Watson 660d4b5cae4SRobert Watson default: 661d4b5cae4SRobert Watson panic("%s: invalid policy %u for %s", __func__, 662d4b5cae4SRobert Watson npp->np_policy, npp->np_name); 663d4b5cae4SRobert Watson } 664d4b5cae4SRobert Watson } 665d4b5cae4SRobert Watson 666d4b5cae4SRobert Watson /* 667d4b5cae4SRobert Watson * Process packets associated with a workstream and protocol. For reasons of 668d4b5cae4SRobert Watson * fairness, we process up to one complete netisr queue at a time, moving the 669d4b5cae4SRobert Watson * queue to a stack-local queue for processing, but do not loop refreshing 670d4b5cae4SRobert Watson * from the global queue. The caller is responsible for deciding whether to 671d4b5cae4SRobert Watson * loop, and for setting the NWS_RUNNING flag. The passed workstream will be 672d4b5cae4SRobert Watson * locked on entry and relocked before return, but will be released while 673d4b5cae4SRobert Watson * processing. The number of packets processed is returned. 674d4b5cae4SRobert Watson */ 675d4b5cae4SRobert Watson static u_int 676d4b5cae4SRobert Watson netisr_process_workstream_proto(struct netisr_workstream *nwsp, u_int proto) 677d4b5cae4SRobert Watson { 678d4b5cae4SRobert Watson struct netisr_work local_npw, *npwp; 679d4b5cae4SRobert Watson u_int handled; 680d4b5cae4SRobert Watson struct mbuf *m; 681d4b5cae4SRobert Watson 682d4b5cae4SRobert Watson NETISR_LOCK_ASSERT(); 683d4b5cae4SRobert Watson NWS_LOCK_ASSERT(nwsp); 684d4b5cae4SRobert Watson 685d4b5cae4SRobert Watson KASSERT(nwsp->nws_flags & NWS_RUNNING, 686d4b5cae4SRobert Watson ("%s(%u): not running", __func__, proto)); 687d4b5cae4SRobert Watson KASSERT(proto >= 0 && proto < NETISR_MAXPROT, 688d4b5cae4SRobert Watson ("%s(%u): invalid proto\n", __func__, proto)); 689d4b5cae4SRobert Watson 690d4b5cae4SRobert Watson npwp = &nwsp->nws_work[proto]; 691d4b5cae4SRobert Watson if (npwp->nw_len == 0) 692d4b5cae4SRobert Watson return (0); 693d4b5cae4SRobert Watson 694d4b5cae4SRobert Watson /* 695d4b5cae4SRobert Watson * Move the global work queue to a thread-local work queue. 696d4b5cae4SRobert Watson * 697d4b5cae4SRobert Watson * Notice that this means the effective maximum length of the queue 698d4b5cae4SRobert Watson * is actually twice that of the maximum queue length specified in 699d4b5cae4SRobert Watson * the protocol registration call. 700d4b5cae4SRobert Watson */ 701d4b5cae4SRobert Watson handled = npwp->nw_len; 702d4b5cae4SRobert Watson local_npw = *npwp; 703d4b5cae4SRobert Watson npwp->nw_head = NULL; 704d4b5cae4SRobert Watson npwp->nw_tail = NULL; 705d4b5cae4SRobert Watson npwp->nw_len = 0; 706d4b5cae4SRobert Watson nwsp->nws_pendingbits &= ~(1 << proto); 707d4b5cae4SRobert Watson NWS_UNLOCK(nwsp); 708d4b5cae4SRobert Watson while ((m = local_npw.nw_head) != NULL) { 709d4b5cae4SRobert Watson local_npw.nw_head = m->m_nextpkt; 710d4b5cae4SRobert Watson m->m_nextpkt = NULL; 711d4b5cae4SRobert Watson if (local_npw.nw_head == NULL) 712d4b5cae4SRobert Watson local_npw.nw_tail = NULL; 713d4b5cae4SRobert Watson local_npw.nw_len--; 714d4b5cae4SRobert Watson VNET_ASSERT(m->m_pkthdr.rcvif != NULL); 715d4b5cae4SRobert Watson CURVNET_SET(m->m_pkthdr.rcvif->if_vnet); 716d4b5cae4SRobert Watson np[proto].np_handler(m); 717d4b5cae4SRobert Watson CURVNET_RESTORE(); 718d4b5cae4SRobert Watson } 719d4b5cae4SRobert Watson KASSERT(local_npw.nw_len == 0, 720d4b5cae4SRobert Watson ("%s(%u): len %u", __func__, proto, local_npw.nw_len)); 721ed655c8cSBjoern A. Zeeb if (np[proto].np_drainedcpu) 722ed655c8cSBjoern A. Zeeb np[proto].np_drainedcpu(nwsp->nws_cpu); 723d4b5cae4SRobert Watson NWS_LOCK(nwsp); 724d4b5cae4SRobert Watson npwp->nw_handled += handled; 725d4b5cae4SRobert Watson return (handled); 726d4b5cae4SRobert Watson } 727d4b5cae4SRobert Watson 728d4b5cae4SRobert Watson /* 729d4b5cae4SRobert Watson * SWI handler for netisr -- processes prackets in a set of workstreams that 730d4b5cae4SRobert Watson * it owns, woken up by calls to NWS_SIGNAL(). If this workstream is already 731d4b5cae4SRobert Watson * being direct dispatched, go back to sleep and wait for the dispatching 732d4b5cae4SRobert Watson * thread to wake us up again. 733d4b5cae4SRobert Watson */ 734d4b5cae4SRobert Watson static void 735d4b5cae4SRobert Watson swi_net(void *arg) 736d4b5cae4SRobert Watson { 737d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 738d4b5cae4SRobert Watson struct rm_priotracker tracker; 739d4b5cae4SRobert Watson #endif 740d4b5cae4SRobert Watson struct netisr_workstream *nwsp; 741d4b5cae4SRobert Watson u_int bits, prot; 742d4b5cae4SRobert Watson 743d4b5cae4SRobert Watson nwsp = arg; 744d4b5cae4SRobert Watson 7451cafed39SJonathan Lemon #ifdef DEVICE_POLLING 746d4b5cae4SRobert Watson KASSERT(nws_count == 1, 747d4b5cae4SRobert Watson ("%s: device_polling but nws_count != 1", __func__)); 748d4b5cae4SRobert Watson netisr_poll(); 749d4b5cae4SRobert Watson #endif 750d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 751d4b5cae4SRobert Watson NETISR_RLOCK(&tracker); 752d4b5cae4SRobert Watson #endif 753d4b5cae4SRobert Watson NWS_LOCK(nwsp); 754d4b5cae4SRobert Watson KASSERT(!(nwsp->nws_flags & NWS_RUNNING), ("swi_net: running")); 755d4b5cae4SRobert Watson if (nwsp->nws_flags & NWS_DISPATCHING) 756d4b5cae4SRobert Watson goto out; 757d4b5cae4SRobert Watson nwsp->nws_flags |= NWS_RUNNING; 758d4b5cae4SRobert Watson nwsp->nws_flags &= ~NWS_SCHEDULED; 759d4b5cae4SRobert Watson while ((bits = nwsp->nws_pendingbits) != 0) { 760d4b5cae4SRobert Watson while ((prot = ffs(bits)) != 0) { 761d4b5cae4SRobert Watson prot--; 762d4b5cae4SRobert Watson bits &= ~(1 << prot); 763d4b5cae4SRobert Watson (void)netisr_process_workstream_proto(nwsp, prot); 764d4b5cae4SRobert Watson } 765d4b5cae4SRobert Watson } 766d4b5cae4SRobert Watson nwsp->nws_flags &= ~NWS_RUNNING; 767d4b5cae4SRobert Watson out: 768d4b5cae4SRobert Watson NWS_UNLOCK(nwsp); 769d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 770d4b5cae4SRobert Watson NETISR_RUNLOCK(&tracker); 771d4b5cae4SRobert Watson #endif 772d4b5cae4SRobert Watson #ifdef DEVICE_POLLING 773d4b5cae4SRobert Watson netisr_pollmore(); 774d4b5cae4SRobert Watson #endif 775d4b5cae4SRobert Watson } 776d4b5cae4SRobert Watson 777d4b5cae4SRobert Watson static int 778d4b5cae4SRobert Watson netisr_queue_workstream(struct netisr_workstream *nwsp, u_int proto, 779d4b5cae4SRobert Watson struct netisr_work *npwp, struct mbuf *m, int *dosignalp) 780d4b5cae4SRobert Watson { 781d4b5cae4SRobert Watson 782d4b5cae4SRobert Watson NWS_LOCK_ASSERT(nwsp); 783d4b5cae4SRobert Watson 784d4b5cae4SRobert Watson *dosignalp = 0; 785d4b5cae4SRobert Watson if (npwp->nw_len < npwp->nw_qlimit) { 786d4b5cae4SRobert Watson m->m_nextpkt = NULL; 787d4b5cae4SRobert Watson if (npwp->nw_head == NULL) { 788d4b5cae4SRobert Watson npwp->nw_head = m; 789d4b5cae4SRobert Watson npwp->nw_tail = m; 790d4b5cae4SRobert Watson } else { 791d4b5cae4SRobert Watson npwp->nw_tail->m_nextpkt = m; 792d4b5cae4SRobert Watson npwp->nw_tail = m; 793d4b5cae4SRobert Watson } 794d4b5cae4SRobert Watson npwp->nw_len++; 795d4b5cae4SRobert Watson if (npwp->nw_len > npwp->nw_watermark) 796d4b5cae4SRobert Watson npwp->nw_watermark = npwp->nw_len; 797d4b5cae4SRobert Watson nwsp->nws_pendingbits |= (1 << proto); 798d4b5cae4SRobert Watson if (!(nwsp->nws_flags & 799d4b5cae4SRobert Watson (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED))) { 800d4b5cae4SRobert Watson nwsp->nws_flags |= NWS_SCHEDULED; 801d4b5cae4SRobert Watson *dosignalp = 1; /* Defer until unlocked. */ 802d4b5cae4SRobert Watson } 803d4b5cae4SRobert Watson npwp->nw_queued++; 804d4b5cae4SRobert Watson return (0); 805d4b5cae4SRobert Watson } else { 806ba3b25b3SBjoern A. Zeeb m_freem(m); 807d4b5cae4SRobert Watson npwp->nw_qdrops++; 808d4b5cae4SRobert Watson return (ENOBUFS); 809d4b5cae4SRobert Watson } 810d4b5cae4SRobert Watson } 811d4b5cae4SRobert Watson 812d4b5cae4SRobert Watson static int 813d4b5cae4SRobert Watson netisr_queue_internal(u_int proto, struct mbuf *m, u_int cpuid) 814d4b5cae4SRobert Watson { 815d4b5cae4SRobert Watson struct netisr_workstream *nwsp; 816d4b5cae4SRobert Watson struct netisr_work *npwp; 817d4b5cae4SRobert Watson int dosignal, error; 818d4b5cae4SRobert Watson 819d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 820d4b5cae4SRobert Watson NETISR_LOCK_ASSERT(); 821d4b5cae4SRobert Watson #endif 8229e6e01ebSRobert Watson KASSERT(cpuid <= mp_maxid, ("%s: cpuid too big (%u, %u)", __func__, 8239e6e01ebSRobert Watson cpuid, mp_maxid)); 82453402767SRobert Watson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 825d4b5cae4SRobert Watson 826d4b5cae4SRobert Watson dosignal = 0; 827d4b5cae4SRobert Watson error = 0; 82853402767SRobert Watson nwsp = DPCPU_ID_PTR(cpuid, nws); 829d4b5cae4SRobert Watson npwp = &nwsp->nws_work[proto]; 830d4b5cae4SRobert Watson NWS_LOCK(nwsp); 831d4b5cae4SRobert Watson error = netisr_queue_workstream(nwsp, proto, npwp, m, &dosignal); 832d4b5cae4SRobert Watson NWS_UNLOCK(nwsp); 833d4b5cae4SRobert Watson if (dosignal) 834d4b5cae4SRobert Watson NWS_SIGNAL(nwsp); 835d4b5cae4SRobert Watson return (error); 836d4b5cae4SRobert Watson } 837d4b5cae4SRobert Watson 838d4b5cae4SRobert Watson int 839d4b5cae4SRobert Watson netisr_queue_src(u_int proto, uintptr_t source, struct mbuf *m) 840d4b5cae4SRobert Watson { 841d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 842d4b5cae4SRobert Watson struct rm_priotracker tracker; 843d4b5cae4SRobert Watson #endif 844d4b5cae4SRobert Watson u_int cpuid; 845d4b5cae4SRobert Watson int error; 846d4b5cae4SRobert Watson 847d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 848d4b5cae4SRobert Watson ("%s: invalid proto %u", __func__, proto)); 849d4b5cae4SRobert Watson 850d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 851d4b5cae4SRobert Watson NETISR_RLOCK(&tracker); 852d4b5cae4SRobert Watson #endif 853d4b5cae4SRobert Watson KASSERT(np[proto].np_handler != NULL, 854d4b5cae4SRobert Watson ("%s: invalid proto %u", __func__, proto)); 855d4b5cae4SRobert Watson 856d4b5cae4SRobert Watson m = netisr_select_cpuid(&np[proto], source, m, &cpuid); 85753402767SRobert Watson if (m != NULL) { 85853402767SRobert Watson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, 85953402767SRobert Watson cpuid)); 860d4b5cae4SRobert Watson error = netisr_queue_internal(proto, m, cpuid); 86153402767SRobert Watson } else 862d4b5cae4SRobert Watson error = ENOBUFS; 863d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 864d4b5cae4SRobert Watson NETISR_RUNLOCK(&tracker); 865d4b5cae4SRobert Watson #endif 866d4b5cae4SRobert Watson return (error); 867d4b5cae4SRobert Watson } 868d4b5cae4SRobert Watson 869d4b5cae4SRobert Watson int 870d4b5cae4SRobert Watson netisr_queue(u_int proto, struct mbuf *m) 871d4b5cae4SRobert Watson { 872d4b5cae4SRobert Watson 873d4b5cae4SRobert Watson return (netisr_queue_src(proto, 0, m)); 874d4b5cae4SRobert Watson } 875d4b5cae4SRobert Watson 876d4b5cae4SRobert Watson /* 877d4b5cae4SRobert Watson * Dispatch a packet for netisr processing, direct dispatch permitted by 878d4b5cae4SRobert Watson * calling context. 879d4b5cae4SRobert Watson */ 880d4b5cae4SRobert Watson int 881d4b5cae4SRobert Watson netisr_dispatch_src(u_int proto, uintptr_t source, struct mbuf *m) 882d4b5cae4SRobert Watson { 883d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 884d4b5cae4SRobert Watson struct rm_priotracker tracker; 885d4b5cae4SRobert Watson #endif 886d4b5cae4SRobert Watson struct netisr_workstream *nwsp; 887d4b5cae4SRobert Watson struct netisr_work *npwp; 888d4b5cae4SRobert Watson int dosignal, error; 889d4b5cae4SRobert Watson u_int cpuid; 890d4b5cae4SRobert Watson 891d4b5cae4SRobert Watson /* 892d4b5cae4SRobert Watson * If direct dispatch is entirely disabled, fall back on queueing. 893d4b5cae4SRobert Watson */ 894d4b5cae4SRobert Watson if (!netisr_direct) 895d4b5cae4SRobert Watson return (netisr_queue_src(proto, source, m)); 896d4b5cae4SRobert Watson 897d4b5cae4SRobert Watson KASSERT(proto < NETISR_MAXPROT, 898d4b5cae4SRobert Watson ("%s: invalid proto %u", __func__, proto)); 899d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 900d4b5cae4SRobert Watson NETISR_RLOCK(&tracker); 901d4b5cae4SRobert Watson #endif 902d4b5cae4SRobert Watson KASSERT(np[proto].np_handler != NULL, 903d4b5cae4SRobert Watson ("%s: invalid proto %u", __func__, proto)); 904d4b5cae4SRobert Watson 905d4b5cae4SRobert Watson /* 906d4b5cae4SRobert Watson * If direct dispatch is forced, then unconditionally dispatch 907d4b5cae4SRobert Watson * without a formal CPU selection. Borrow the current CPU's stats, 908d4b5cae4SRobert Watson * even if there's no worker on it. In this case we don't update 909d4b5cae4SRobert Watson * nws_flags because all netisr processing will be source ordered due 910d4b5cae4SRobert Watson * to always being forced to directly dispatch. 911d4b5cae4SRobert Watson */ 912d4b5cae4SRobert Watson if (netisr_direct_force) { 91353402767SRobert Watson nwsp = DPCPU_PTR(nws); 914d4b5cae4SRobert Watson npwp = &nwsp->nws_work[proto]; 915d4b5cae4SRobert Watson npwp->nw_dispatched++; 916d4b5cae4SRobert Watson npwp->nw_handled++; 917d4b5cae4SRobert Watson np[proto].np_handler(m); 918d4b5cae4SRobert Watson error = 0; 919d4b5cae4SRobert Watson goto out_unlock; 920d4b5cae4SRobert Watson } 921d4b5cae4SRobert Watson 922d4b5cae4SRobert Watson /* 923d4b5cae4SRobert Watson * Otherwise, we execute in a hybrid mode where we will try to direct 924d4b5cae4SRobert Watson * dispatch if we're on the right CPU and the netisr worker isn't 925d4b5cae4SRobert Watson * already running. 926d4b5cae4SRobert Watson */ 927d4b5cae4SRobert Watson m = netisr_select_cpuid(&np[proto], source, m, &cpuid); 928d4b5cae4SRobert Watson if (m == NULL) { 929d4b5cae4SRobert Watson error = ENOBUFS; 930d4b5cae4SRobert Watson goto out_unlock; 931d4b5cae4SRobert Watson } 93253402767SRobert Watson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 933d4b5cae4SRobert Watson sched_pin(); 934d4b5cae4SRobert Watson if (cpuid != curcpu) 935d4b5cae4SRobert Watson goto queue_fallback; 93653402767SRobert Watson nwsp = DPCPU_PTR(nws); 937d4b5cae4SRobert Watson npwp = &nwsp->nws_work[proto]; 938d4b5cae4SRobert Watson 939d4b5cae4SRobert Watson /*- 940d4b5cae4SRobert Watson * We are willing to direct dispatch only if three conditions hold: 941d4b5cae4SRobert Watson * 942d4b5cae4SRobert Watson * (1) The netisr worker isn't already running, 943d4b5cae4SRobert Watson * (2) Another thread isn't already directly dispatching, and 944d4b5cae4SRobert Watson * (3) The netisr hasn't already been woken up. 945d4b5cae4SRobert Watson */ 946d4b5cae4SRobert Watson NWS_LOCK(nwsp); 947d4b5cae4SRobert Watson if (nwsp->nws_flags & (NWS_RUNNING | NWS_DISPATCHING | NWS_SCHEDULED)) { 948d4b5cae4SRobert Watson error = netisr_queue_workstream(nwsp, proto, npwp, m, 949d4b5cae4SRobert Watson &dosignal); 95053402767SRobert Watson NWS_UNLOCK(nwsp); 951d4b5cae4SRobert Watson if (dosignal) 952d4b5cae4SRobert Watson NWS_SIGNAL(nwsp); 953d4b5cae4SRobert Watson goto out_unpin; 954d4b5cae4SRobert Watson } 955d4b5cae4SRobert Watson 956d4b5cae4SRobert Watson /* 957d4b5cae4SRobert Watson * The current thread is now effectively the netisr worker, so set 958d4b5cae4SRobert Watson * the dispatching flag to prevent concurrent processing of the 959d4b5cae4SRobert Watson * stream from another thread (even the netisr worker), which could 960d4b5cae4SRobert Watson * otherwise lead to effective misordering of the stream. 961d4b5cae4SRobert Watson */ 962d4b5cae4SRobert Watson nwsp->nws_flags |= NWS_DISPATCHING; 963d4b5cae4SRobert Watson NWS_UNLOCK(nwsp); 964d4b5cae4SRobert Watson np[proto].np_handler(m); 965d4b5cae4SRobert Watson NWS_LOCK(nwsp); 966d4b5cae4SRobert Watson nwsp->nws_flags &= ~NWS_DISPATCHING; 967d4b5cae4SRobert Watson npwp->nw_handled++; 968d4b5cae4SRobert Watson npwp->nw_hybrid_dispatched++; 969d4b5cae4SRobert Watson 970d4b5cae4SRobert Watson /* 971d4b5cae4SRobert Watson * If other work was enqueued by another thread while we were direct 972d4b5cae4SRobert Watson * dispatching, we need to signal the netisr worker to do that work. 973d4b5cae4SRobert Watson * In the future, we might want to do some of that work in the 974d4b5cae4SRobert Watson * current thread, rather than trigger further context switches. If 975d4b5cae4SRobert Watson * so, we'll want to establish a reasonable bound on the work done in 976d4b5cae4SRobert Watson * the "borrowed" context. 977d4b5cae4SRobert Watson */ 978d4b5cae4SRobert Watson if (nwsp->nws_pendingbits != 0) { 979d4b5cae4SRobert Watson nwsp->nws_flags |= NWS_SCHEDULED; 980d4b5cae4SRobert Watson dosignal = 1; 981d4b5cae4SRobert Watson } else 982d4b5cae4SRobert Watson dosignal = 0; 983d4b5cae4SRobert Watson NWS_UNLOCK(nwsp); 984d4b5cae4SRobert Watson if (dosignal) 985d4b5cae4SRobert Watson NWS_SIGNAL(nwsp); 986d4b5cae4SRobert Watson error = 0; 987d4b5cae4SRobert Watson goto out_unpin; 988d4b5cae4SRobert Watson 989d4b5cae4SRobert Watson queue_fallback: 990d4b5cae4SRobert Watson error = netisr_queue_internal(proto, m, cpuid); 991d4b5cae4SRobert Watson out_unpin: 992d4b5cae4SRobert Watson sched_unpin(); 993d4b5cae4SRobert Watson out_unlock: 994d4b5cae4SRobert Watson #ifdef NETISR_LOCKING 995d4b5cae4SRobert Watson NETISR_RUNLOCK(&tracker); 996d4b5cae4SRobert Watson #endif 997d4b5cae4SRobert Watson return (error); 998d4b5cae4SRobert Watson } 999d4b5cae4SRobert Watson 1000d4b5cae4SRobert Watson int 1001d4b5cae4SRobert Watson netisr_dispatch(u_int proto, struct mbuf *m) 1002d4b5cae4SRobert Watson { 1003d4b5cae4SRobert Watson 1004d4b5cae4SRobert Watson return (netisr_dispatch_src(proto, 0, m)); 1005d4b5cae4SRobert Watson } 1006d4b5cae4SRobert Watson 1007d4b5cae4SRobert Watson #ifdef DEVICE_POLLING 1008d4b5cae4SRobert Watson /* 1009d4b5cae4SRobert Watson * Kernel polling borrows a netisr thread to run interface polling in; this 1010d4b5cae4SRobert Watson * function allows kernel polling to request that the netisr thread be 1011d4b5cae4SRobert Watson * scheduled even if no packets are pending for protocols. 1012d4b5cae4SRobert Watson */ 1013d4b5cae4SRobert Watson void 1014d4b5cae4SRobert Watson netisr_sched_poll(void) 1015d4b5cae4SRobert Watson { 1016d4b5cae4SRobert Watson struct netisr_workstream *nwsp; 1017d4b5cae4SRobert Watson 101853402767SRobert Watson nwsp = DPCPU_ID_PTR(nws_array[0], nws); 1019d4b5cae4SRobert Watson NWS_SIGNAL(nwsp); 1020d4b5cae4SRobert Watson } 10211cafed39SJonathan Lemon #endif 1022e3b6e33cSJake Burkholder 1023d4b5cae4SRobert Watson static void 1024d4b5cae4SRobert Watson netisr_start_swi(u_int cpuid, struct pcpu *pc) 1025d4b5cae4SRobert Watson { 1026d4b5cae4SRobert Watson char swiname[12]; 1027d4b5cae4SRobert Watson struct netisr_workstream *nwsp; 1028d4b5cae4SRobert Watson int error; 1029d4b5cae4SRobert Watson 103053402767SRobert Watson KASSERT(!CPU_ABSENT(cpuid), ("%s: CPU %u absent", __func__, cpuid)); 103153402767SRobert Watson 103253402767SRobert Watson nwsp = DPCPU_ID_PTR(cpuid, nws); 1033d4b5cae4SRobert Watson mtx_init(&nwsp->nws_mtx, "netisr_mtx", NULL, MTX_DEF); 1034d4b5cae4SRobert Watson nwsp->nws_cpu = cpuid; 1035d4b5cae4SRobert Watson snprintf(swiname, sizeof(swiname), "netisr %u", cpuid); 1036d4b5cae4SRobert Watson error = swi_add(&nwsp->nws_intr_event, swiname, swi_net, nwsp, 1037d4b5cae4SRobert Watson SWI_NET, INTR_MPSAFE, &nwsp->nws_swi_cookie); 1038d4b5cae4SRobert Watson if (error) 1039d4b5cae4SRobert Watson panic("%s: swi_add %d", __func__, error); 1040d4b5cae4SRobert Watson pc->pc_netisr = nwsp->nws_intr_event; 1041d4b5cae4SRobert Watson if (netisr_bindthreads) { 1042d4b5cae4SRobert Watson error = intr_event_bind(nwsp->nws_intr_event, cpuid); 1043d4b5cae4SRobert Watson if (error != 0) 1044d4b5cae4SRobert Watson printf("%s: cpu %u: intr_event_bind: %d", __func__, 1045d4b5cae4SRobert Watson cpuid, error); 1046e3b6e33cSJake Burkholder } 1047d4b5cae4SRobert Watson NETISR_WLOCK(); 1048d4b5cae4SRobert Watson nws_array[nws_count] = nwsp->nws_cpu; 1049d4b5cae4SRobert Watson nws_count++; 1050d4b5cae4SRobert Watson NETISR_WUNLOCK(); 1051e3b6e33cSJake Burkholder } 1052e3b6e33cSJake Burkholder 1053d4b5cae4SRobert Watson /* 1054d4b5cae4SRobert Watson * Initialize the netisr subsystem. We rely on BSS and static initialization 1055d4b5cae4SRobert Watson * of most fields in global data structures. 1056d4b5cae4SRobert Watson * 1057d4b5cae4SRobert Watson * Start a worker thread for the boot CPU so that we can support network 1058d4b5cae4SRobert Watson * traffic immediately in case the network stack is used before additional 1059d4b5cae4SRobert Watson * CPUs are started (for example, diskless boot). 1060d4b5cae4SRobert Watson */ 1061e3b6e33cSJake Burkholder static void 1062d4b5cae4SRobert Watson netisr_init(void *arg) 1063e3b6e33cSJake Burkholder { 1064e3b6e33cSJake Burkholder 1065d4b5cae4SRobert Watson KASSERT(curcpu == 0, ("%s: not on CPU 0", __func__)); 1066d4b5cae4SRobert Watson 1067d4b5cae4SRobert Watson NETISR_LOCK_INIT(); 10689e6e01ebSRobert Watson if (netisr_maxthreads < 1) 1069d4b5cae4SRobert Watson netisr_maxthreads = 1; 10709e6e01ebSRobert Watson if (netisr_maxthreads > mp_ncpus) { 10719e6e01ebSRobert Watson printf("netisr2: forcing maxthreads from %d to %d\n", 10729e6e01ebSRobert Watson netisr_maxthreads, mp_ncpus); 10739e6e01ebSRobert Watson netisr_maxthreads = mp_ncpus; 1074ed54411cSRobert Watson } 1075ed54411cSRobert Watson if (netisr_defaultqlimit > netisr_maxqlimit) { 10769e6e01ebSRobert Watson printf("netisr2: forcing defaultqlimit from %d to %d\n", 10779e6e01ebSRobert Watson netisr_defaultqlimit, netisr_maxqlimit); 1078d4b5cae4SRobert Watson netisr_defaultqlimit = netisr_maxqlimit; 1079ed54411cSRobert Watson } 1080d4b5cae4SRobert Watson #ifdef DEVICE_POLLING 1081d4b5cae4SRobert Watson /* 1082d4b5cae4SRobert Watson * The device polling code is not yet aware of how to deal with 1083d4b5cae4SRobert Watson * multiple netisr threads, so for the time being compiling in device 1084d4b5cae4SRobert Watson * polling disables parallel netisr workers. 1085d4b5cae4SRobert Watson */ 1086ed54411cSRobert Watson if (netisr_maxthreads != 1 || netisr_bindthreads != 0) { 1087ed54411cSRobert Watson printf("netisr2: forcing maxthreads to 1 and bindthreads to " 1088ed54411cSRobert Watson "0 for device polling\n"); 1089d4b5cae4SRobert Watson netisr_maxthreads = 1; 1090d4b5cae4SRobert Watson netisr_bindthreads = 0; 1091ed54411cSRobert Watson } 1092d4b5cae4SRobert Watson #endif 1093d4b5cae4SRobert Watson 1094d4b5cae4SRobert Watson netisr_start_swi(curcpu, pcpu_find(curcpu)); 1095e3b6e33cSJake Burkholder } 1096d4b5cae4SRobert Watson SYSINIT(netisr_init, SI_SUB_SOFTINTR, SI_ORDER_FIRST, netisr_init, NULL); 1097d4b5cae4SRobert Watson 1098d4b5cae4SRobert Watson /* 1099d4b5cae4SRobert Watson * Start worker threads for additional CPUs. No attempt to gracefully handle 1100d4b5cae4SRobert Watson * work reassignment, we don't yet support dynamic reconfiguration. 1101d4b5cae4SRobert Watson */ 1102d4b5cae4SRobert Watson static void 1103d4b5cae4SRobert Watson netisr_start(void *arg) 1104d4b5cae4SRobert Watson { 1105d4b5cae4SRobert Watson struct pcpu *pc; 1106d4b5cae4SRobert Watson 1107d4b5cae4SRobert Watson SLIST_FOREACH(pc, &cpuhead, pc_allcpu) { 1108d4b5cae4SRobert Watson if (nws_count >= netisr_maxthreads) 1109d4b5cae4SRobert Watson break; 1110d4b5cae4SRobert Watson /* XXXRW: Is skipping absent CPUs still required here? */ 1111d4b5cae4SRobert Watson if (CPU_ABSENT(pc->pc_cpuid)) 1112d4b5cae4SRobert Watson continue; 1113d4b5cae4SRobert Watson /* Worker will already be present for boot CPU. */ 1114d4b5cae4SRobert Watson if (pc->pc_netisr != NULL) 1115d4b5cae4SRobert Watson continue; 1116d4b5cae4SRobert Watson netisr_start_swi(pc->pc_cpuid, pc); 1117d4b5cae4SRobert Watson } 1118d4b5cae4SRobert Watson } 1119d4b5cae4SRobert Watson SYSINIT(netisr_start, SI_SUB_SMP, SI_ORDER_MIDDLE, netisr_start, NULL); 1120d4b5cae4SRobert Watson 1121d4b5cae4SRobert Watson #ifdef DDB 1122d4b5cae4SRobert Watson DB_SHOW_COMMAND(netisr, db_show_netisr) 1123d4b5cae4SRobert Watson { 1124d4b5cae4SRobert Watson struct netisr_workstream *nwsp; 1125d4b5cae4SRobert Watson struct netisr_work *nwp; 1126d4b5cae4SRobert Watson int first, proto; 112753402767SRobert Watson u_int cpuid; 1128d4b5cae4SRobert Watson 1129d4b5cae4SRobert Watson db_printf("%3s %6s %5s %5s %5s %8s %8s %8s %8s\n", "CPU", "Proto", 1130d4b5cae4SRobert Watson "Len", "WMark", "Max", "Disp", "HDisp", "Drop", "Queue"); 11319e6e01ebSRobert Watson for (cpuid = 0; cpuid <= mp_maxid; cpuid++) { 113253402767SRobert Watson if (CPU_ABSENT(cpuid)) 113353402767SRobert Watson continue; 113453402767SRobert Watson nwsp = DPCPU_ID_PTR(cpuid, nws); 1135d4b5cae4SRobert Watson if (nwsp->nws_intr_event == NULL) 1136d4b5cae4SRobert Watson continue; 1137d4b5cae4SRobert Watson first = 1; 1138d4b5cae4SRobert Watson for (proto = 0; proto < NETISR_MAXPROT; proto++) { 1139d4b5cae4SRobert Watson if (np[proto].np_handler == NULL) 1140d4b5cae4SRobert Watson continue; 1141d4b5cae4SRobert Watson nwp = &nwsp->nws_work[proto]; 1142d4b5cae4SRobert Watson if (first) { 114353402767SRobert Watson db_printf("%3d ", cpuid); 1144d4b5cae4SRobert Watson first = 0; 1145d4b5cae4SRobert Watson } else 1146d4b5cae4SRobert Watson db_printf("%3s ", ""); 1147d4b5cae4SRobert Watson db_printf( 1148d4b5cae4SRobert Watson "%6s %5d %5d %5d %8ju %8ju %8ju %8ju\n", 1149d4b5cae4SRobert Watson np[proto].np_name, nwp->nw_len, 1150d4b5cae4SRobert Watson nwp->nw_watermark, nwp->nw_qlimit, 1151d4b5cae4SRobert Watson nwp->nw_dispatched, nwp->nw_hybrid_dispatched, 1152d4b5cae4SRobert Watson nwp->nw_qdrops, nwp->nw_queued); 1153d4b5cae4SRobert Watson } 1154d4b5cae4SRobert Watson } 1155d4b5cae4SRobert Watson } 1156d4b5cae4SRobert Watson #endif 1157