1 /*- 2 * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon@FreeBSD.org> 3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include "opt_device_polling.h" 31 #include "opt_net.h" 32 33 #include <sys/param.h> 34 #include <sys/bus.h> 35 #include <sys/rtprio.h> 36 #include <sys/systm.h> 37 #include <sys/interrupt.h> 38 #include <sys/kernel.h> 39 #include <sys/kthread.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/proc.h> 43 #include <sys/random.h> 44 #include <sys/resourcevar.h> 45 #include <sys/sysctl.h> 46 #include <sys/unistd.h> 47 #include <machine/atomic.h> 48 #include <machine/cpu.h> 49 #include <machine/stdarg.h> 50 51 #include <sys/mbuf.h> 52 #include <sys/socket.h> 53 54 #include <net/if.h> 55 #include <net/if_types.h> 56 #include <net/if_var.h> 57 #include <net/netisr.h> 58 59 /* 60 * debug_mpsafenet controls network subsystem-wide use of the Giant lock, 61 * from system calls down to interrupt handlers. It can be changed only via 62 * a tunable at boot, not at run-time, due to the complexity of unwinding. 63 * The compiled default is set via a kernel option; right now, the default 64 * unless otherwise specified is to run the network stack without Giant. 65 */ 66 #ifdef NET_WITH_GIANT 67 int debug_mpsafenet = 0; 68 #else 69 int debug_mpsafenet = 1; 70 #endif 71 int debug_mpsafenet_toolatetotwiddle = 0; 72 73 TUNABLE_INT("debug.mpsafenet", &debug_mpsafenet); 74 SYSCTL_INT(_debug, OID_AUTO, mpsafenet, CTLFLAG_RD, &debug_mpsafenet, 0, 75 "Enable/disable MPSAFE network support"); 76 77 volatile unsigned int netisr; /* scheduling bits for network */ 78 79 struct netisr { 80 netisr_t *ni_handler; 81 struct ifqueue *ni_queue; 82 int ni_flags; 83 } netisrs[32]; 84 85 static void *net_ih; 86 87 /* 88 * Not all network code is currently capable of running MPSAFE; however, 89 * most of it is. Since those sections that are not are generally optional 90 * components not shipped with default kernels, we provide a basic way to 91 * determine whether MPSAFE operation is permitted: based on a default of 92 * yes, we permit non-MPSAFE components to use a registration call to 93 * identify that they require Giant. If the system is early in the boot 94 * process still, then we change the debug_mpsafenet setting to choose a 95 * non-MPSAFE execution mode (degraded). If it's too late for that (since 96 * the setting cannot be changed at run time), we generate a console warning 97 * that the configuration may be unsafe. 98 */ 99 static int mpsafe_warn_count; 100 101 /* 102 * Function call implementing registration of a non-MPSAFE network component. 103 */ 104 void 105 net_warn_not_mpsafe(const char *component) 106 { 107 108 /* 109 * If we're running with Giant over the network stack, there is no 110 * problem. 111 */ 112 if (!debug_mpsafenet) 113 return; 114 115 /* 116 * If it's not too late to change the MPSAFE setting for the network 117 * stack, do so now. This effectively suppresses warnings by 118 * components registering later. 119 */ 120 if (!debug_mpsafenet_toolatetotwiddle) { 121 debug_mpsafenet = 0; 122 printf("WARNING: debug.mpsafenet forced to 0 as %s requires " 123 "Giant\n", component); 124 return; 125 } 126 127 /* 128 * We must run without Giant, so generate a console warning with some 129 * information with what to do about it. The system may be operating 130 * unsafely, however. 131 */ 132 printf("WARNING: Network stack Giant-free, but %s requires Giant.\n", 133 component); 134 if (mpsafe_warn_count == 0) 135 printf(" Consider adding 'options NET_WITH_GIANT' or " 136 "setting debug.mpsafenet=0\n"); 137 mpsafe_warn_count++; 138 } 139 140 /* 141 * This sysinit is run after any pre-loaded or compiled-in components have 142 * announced that they require Giant, but before any modules loaded at 143 * run-time. 144 */ 145 static void 146 net_mpsafe_toolate(void *arg) 147 { 148 149 debug_mpsafenet_toolatetotwiddle = 1; 150 151 if (!debug_mpsafenet) 152 printf("WARNING: MPSAFE network stack disabled, expect " 153 "reduced performance.\n"); 154 } 155 156 SYSINIT(net_mpsafe_toolate, SI_SUB_SETTINGS, SI_ORDER_ANY, net_mpsafe_toolate, 157 NULL); 158 159 void 160 legacy_setsoftnet(void) 161 { 162 swi_sched(net_ih, 0); 163 } 164 165 void 166 netisr_register(int num, netisr_t *handler, struct ifqueue *inq, int flags) 167 { 168 169 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 170 ("bad isr %d", num)); 171 netisrs[num].ni_handler = handler; 172 netisrs[num].ni_queue = inq; 173 if ((flags & NETISR_MPSAFE) && !debug_mpsafenet) 174 flags &= ~NETISR_MPSAFE; 175 netisrs[num].ni_flags = flags; 176 } 177 178 void 179 netisr_unregister(int num) 180 { 181 struct netisr *ni; 182 183 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 184 ("bad isr %d", num)); 185 ni = &netisrs[num]; 186 ni->ni_handler = NULL; 187 if (ni->ni_queue != NULL) 188 IF_DRAIN(ni->ni_queue); 189 ni->ni_queue = NULL; 190 } 191 192 struct isrstat { 193 int isrs_count; /* dispatch count */ 194 int isrs_directed; /* ...directly dispatched */ 195 int isrs_deferred; /* ...queued instead */ 196 int isrs_queued; /* intentionally queueued */ 197 int isrs_drop; /* dropped 'cuz no handler */ 198 int isrs_swi_count; /* swi_net handlers called */ 199 }; 200 static struct isrstat isrstat; 201 202 SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters"); 203 204 static int netisr_direct = 1; 205 SYSCTL_INT(_net_isr, OID_AUTO, direct, CTLFLAG_RW, 206 &netisr_direct, 0, "enable direct dispatch"); 207 TUNABLE_INT("net.isr.direct", &netisr_direct); 208 209 SYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD, 210 &isrstat.isrs_count, 0, ""); 211 SYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD, 212 &isrstat.isrs_directed, 0, ""); 213 SYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, 214 &isrstat.isrs_deferred, 0, ""); 215 SYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, 216 &isrstat.isrs_queued, 0, ""); 217 SYSCTL_INT(_net_isr, OID_AUTO, drop, CTLFLAG_RD, 218 &isrstat.isrs_drop, 0, ""); 219 SYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, 220 &isrstat.isrs_swi_count, 0, ""); 221 222 /* 223 * Process all packets currently present in a netisr queue. Used to 224 * drain an existing set of packets waiting for processing when we 225 * begin direct dispatch, to avoid processing packets out of order. 226 */ 227 static void 228 netisr_processqueue(struct netisr *ni) 229 { 230 struct mbuf *m; 231 232 for (;;) { 233 IF_DEQUEUE(ni->ni_queue, m); 234 if (m == NULL) 235 break; 236 ni->ni_handler(m); 237 } 238 } 239 240 /* 241 * Call the netisr directly instead of queueing the packet, if possible. 242 */ 243 void 244 netisr_dispatch(int num, struct mbuf *m) 245 { 246 struct netisr *ni; 247 248 isrstat.isrs_count++; /* XXX redundant */ 249 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 250 ("bad isr %d", num)); 251 ni = &netisrs[num]; 252 if (ni->ni_queue == NULL) { 253 isrstat.isrs_drop++; 254 m_freem(m); 255 return; 256 } 257 /* 258 * Do direct dispatch only for MPSAFE netisrs (and 259 * only when enabled). Note that when a netisr is 260 * marked MPSAFE we permit multiple concurrent instances 261 * to run. We guarantee only the order in which 262 * packets are processed for each "dispatch point" in 263 * the system (i.e. call to netisr_dispatch or 264 * netisr_queue). This insures ordering of packets 265 * from an interface but does not guarantee ordering 266 * between multiple places in the system (e.g. IP 267 * dispatched from interfaces vs. IP queued from IPSec). 268 */ 269 if (netisr_direct && (ni->ni_flags & NETISR_MPSAFE)) { 270 isrstat.isrs_directed++; 271 /* 272 * NB: We used to drain the queue before handling 273 * the packet but now do not. Doing so here will 274 * not preserve ordering so instead we fallback to 275 * guaranteeing order only from dispatch points 276 * in the system (see above). 277 */ 278 ni->ni_handler(m); 279 } else { 280 isrstat.isrs_deferred++; 281 if (IF_HANDOFF(ni->ni_queue, m, NULL)) 282 schednetisr(num); 283 } 284 } 285 286 /* 287 * Same as above, but always queue. 288 * This is either used in places where we are not confident that 289 * direct dispatch is possible, or where queueing is required. 290 * It returns (0) on success and ERRNO on failure. On failure the 291 * mbuf has been free'd. 292 */ 293 int 294 netisr_queue(int num, struct mbuf *m) 295 { 296 struct netisr *ni; 297 298 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 299 ("bad isr %d", num)); 300 ni = &netisrs[num]; 301 if (ni->ni_queue == NULL) { 302 isrstat.isrs_drop++; 303 m_freem(m); 304 return (ENXIO); 305 } 306 isrstat.isrs_queued++; 307 if (!IF_HANDOFF(ni->ni_queue, m, NULL)) 308 return (ENOBUFS); /* IF_HANDOFF has free'd the mbuf */ 309 schednetisr(num); 310 return (0); 311 } 312 313 static void 314 swi_net(void *dummy) 315 { 316 struct netisr *ni; 317 u_int bits; 318 int i; 319 #ifdef DEVICE_POLLING 320 const int polling = 1; 321 #else 322 const int polling = 0; 323 #endif 324 325 do { 326 bits = atomic_readandclear_int(&netisr); 327 if (bits == 0) 328 break; 329 while ((i = ffs(bits)) != 0) { 330 isrstat.isrs_swi_count++; 331 i--; 332 bits &= ~(1 << i); 333 ni = &netisrs[i]; 334 if (ni->ni_handler == NULL) { 335 printf("swi_net: unregistered isr %d.\n", i); 336 continue; 337 } 338 if ((ni->ni_flags & NETISR_MPSAFE) == 0) { 339 mtx_lock(&Giant); 340 if (ni->ni_queue == NULL) 341 ni->ni_handler(NULL); 342 else 343 netisr_processqueue(ni); 344 mtx_unlock(&Giant); 345 } else { 346 if (ni->ni_queue == NULL) 347 ni->ni_handler(NULL); 348 else 349 netisr_processqueue(ni); 350 } 351 } 352 } while (polling); 353 } 354 355 static void 356 start_netisr(void *dummy) 357 { 358 359 if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, INTR_MPSAFE, &net_ih)) 360 panic("start_netisr"); 361 } 362 SYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL) 363