1 /*- 2 * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon@FreeBSD.org> 3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include "opt_net.h" 31 32 #include <sys/param.h> 33 #include <sys/bus.h> 34 #include <sys/rtprio.h> 35 #include <sys/systm.h> 36 #include <sys/interrupt.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/proc.h> 42 #include <sys/random.h> 43 #include <sys/resourcevar.h> 44 #include <sys/sysctl.h> 45 #include <sys/unistd.h> 46 #include <machine/atomic.h> 47 #include <machine/cpu.h> 48 #include <machine/stdarg.h> 49 50 #include <sys/mbuf.h> 51 #include <sys/socket.h> 52 53 #include <net/if.h> 54 #include <net/if_types.h> 55 #include <net/if_var.h> 56 #include <net/netisr.h> 57 58 /* 59 * debug_mpsafenet controls network subsystem-wide use of the Giant lock, 60 * from system calls down to interrupt handlers. It can be changed only via 61 * a tunable at boot, not at run-time, due to the complexity of unwinding. 62 * The compiled default is set via a kernel option; right now, the default 63 * unless otherwise specified is to run the network stack without Giant. 64 */ 65 #ifdef NET_WITH_GIANT 66 int debug_mpsafenet = 0; 67 #else 68 int debug_mpsafenet = 1; 69 #endif 70 int debug_mpsafenet_toolatetotwiddle = 0; 71 72 TUNABLE_INT("debug.mpsafenet", &debug_mpsafenet); 73 SYSCTL_INT(_debug, OID_AUTO, mpsafenet, CTLFLAG_RD, &debug_mpsafenet, 0, 74 "Enable/disable MPSAFE network support"); 75 76 volatile unsigned int netisr; /* scheduling bits for network */ 77 78 struct netisr { 79 netisr_t *ni_handler; 80 struct ifqueue *ni_queue; 81 int ni_flags; 82 } netisrs[32]; 83 84 static void *net_ih; 85 86 /* 87 * Not all network code is currently capable of running MPSAFE; however, 88 * most of it is. Since those sections that are not are generally optional 89 * components not shipped with default kernels, we provide a basic way to 90 * determine whether MPSAFE operation is permitted: based on a default of 91 * yes, we permit non-MPSAFE components to use a registration call to 92 * identify that they require Giant. If the system is early in the boot 93 * process still, then we change the debug_mpsafenet setting to choose a 94 * non-MPSAFE execution mode (degraded). If it's too late for that (since 95 * the setting cannot be changed at run time), we generate a console warning 96 * that the configuration may be unsafe. 97 */ 98 static int mpsafe_warn_count; 99 100 /* 101 * Function call implementing registration of a non-MPSAFE network component. 102 */ 103 void 104 net_warn_not_mpsafe(const char *component) 105 { 106 107 /* 108 * If we're running with Giant over the network stack, there is no 109 * problem. 110 */ 111 if (!debug_mpsafenet) 112 return; 113 114 /* 115 * If it's not too late to change the MPSAFE setting for the network 116 * stack, do so now. This effectively suppresses warnings by 117 * components registering later. 118 */ 119 if (!debug_mpsafenet_toolatetotwiddle) { 120 debug_mpsafenet = 0; 121 printf("WARNING: debug.mpsafenet forced to 0 as %s requires " 122 "Giant\n", component); 123 return; 124 } 125 126 /* 127 * We must run without Giant, so generate a console warning with some 128 * information with what to do about it. The system may be operating 129 * unsafely, however. 130 */ 131 printf("WARNING: Network stack Giant-free, but %s requires Giant.\n", 132 component); 133 if (mpsafe_warn_count == 0) 134 printf(" Consider adding 'options NET_WITH_GIANT' or " 135 "setting debug.mpsafenet=0\n"); 136 mpsafe_warn_count++; 137 } 138 139 /* 140 * This sysinit is run after any pre-loaded or compiled-in components have 141 * announced that they require Giant, but before any modules loaded at 142 * run-time. 143 */ 144 static void 145 net_mpsafe_toolate(void *arg) 146 { 147 148 debug_mpsafenet_toolatetotwiddle = 1; 149 150 if (!debug_mpsafenet) 151 printf("WARNING: MPSAFE network stack disabled, expect " 152 "reduced performance.\n"); 153 } 154 155 SYSINIT(net_mpsafe_toolate, SI_SUB_SETTINGS, SI_ORDER_ANY, net_mpsafe_toolate, 156 NULL); 157 158 void 159 legacy_setsoftnet(void) 160 { 161 swi_sched(net_ih, 0); 162 } 163 164 void 165 netisr_register(int num, netisr_t *handler, struct ifqueue *inq, int flags) 166 { 167 168 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 169 ("bad isr %d", num)); 170 netisrs[num].ni_handler = handler; 171 netisrs[num].ni_queue = inq; 172 if ((flags & NETISR_MPSAFE) && !debug_mpsafenet) 173 flags &= ~NETISR_MPSAFE; 174 netisrs[num].ni_flags = flags; 175 } 176 177 void 178 netisr_unregister(int num) 179 { 180 struct netisr *ni; 181 182 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 183 ("bad isr %d", num)); 184 ni = &netisrs[num]; 185 ni->ni_handler = NULL; 186 if (ni->ni_queue != NULL) 187 IF_DRAIN(ni->ni_queue); 188 ni->ni_queue = NULL; 189 } 190 191 struct isrstat { 192 int isrs_count; /* dispatch count */ 193 int isrs_directed; /* ...directly dispatched */ 194 int isrs_deferred; /* ...queued instead */ 195 int isrs_queued; /* intentionally queueued */ 196 int isrs_drop; /* dropped 'cuz no handler */ 197 int isrs_swi_count; /* swi_net handlers called */ 198 }; 199 static struct isrstat isrstat; 200 201 SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters"); 202 203 static int netisr_enable = 0; 204 SYSCTL_INT(_net_isr, OID_AUTO, enable, CTLFLAG_RW, 205 &netisr_enable, 0, "enable direct dispatch"); 206 TUNABLE_INT("net.isr.enable", &netisr_enable); 207 208 SYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD, 209 &isrstat.isrs_count, 0, ""); 210 SYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD, 211 &isrstat.isrs_directed, 0, ""); 212 SYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, 213 &isrstat.isrs_deferred, 0, ""); 214 SYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, 215 &isrstat.isrs_queued, 0, ""); 216 SYSCTL_INT(_net_isr, OID_AUTO, drop, CTLFLAG_RD, 217 &isrstat.isrs_drop, 0, ""); 218 SYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, 219 &isrstat.isrs_swi_count, 0, ""); 220 221 /* 222 * Process all packets currently present in a netisr queue. Used to 223 * drain an existing set of packets waiting for processing when we 224 * begin direct dispatch, to avoid processing packets out of order. 225 */ 226 static void 227 netisr_processqueue(struct netisr *ni) 228 { 229 struct mbuf *m; 230 231 for (;;) { 232 IF_DEQUEUE(ni->ni_queue, m); 233 if (m == NULL) 234 break; 235 ni->ni_handler(m); 236 } 237 } 238 239 /* 240 * Call the netisr directly instead of queueing the packet, if possible. 241 */ 242 void 243 netisr_dispatch(int num, struct mbuf *m) 244 { 245 struct netisr *ni; 246 247 isrstat.isrs_count++; /* XXX redundant */ 248 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 249 ("bad isr %d", num)); 250 ni = &netisrs[num]; 251 if (ni->ni_queue == NULL) { 252 isrstat.isrs_drop++; 253 m_freem(m); 254 return; 255 } 256 /* 257 * Do direct dispatch only for MPSAFE netisrs (and 258 * only when enabled). Note that when a netisr is 259 * marked MPSAFE we permit multiple concurrent instances 260 * to run. We guarantee only the order in which 261 * packets are processed for each "dispatch point" in 262 * the system (i.e. call to netisr_dispatch or 263 * netisr_queue). This insures ordering of packets 264 * from an interface but does not guarantee ordering 265 * between multiple places in the system (e.g. IP 266 * dispatched from interfaces vs. IP queued from IPSec). 267 */ 268 if (netisr_enable && (ni->ni_flags & NETISR_MPSAFE)) { 269 isrstat.isrs_directed++; 270 /* 271 * NB: We used to drain the queue before handling 272 * the packet but now do not. Doing so here will 273 * not preserve ordering so instead we fallback to 274 * guaranteeing order only from dispatch points 275 * in the system (see above). 276 */ 277 ni->ni_handler(m); 278 } else { 279 isrstat.isrs_deferred++; 280 if (IF_HANDOFF(ni->ni_queue, m, NULL)) 281 schednetisr(num); 282 } 283 } 284 285 /* 286 * Same as above, but always queue. 287 * This is either used in places where we are not confident that 288 * direct dispatch is possible, or where queueing is required. 289 * It returns (0) on success and ERRNO on failure. On failure the 290 * mbuf has been free'd. 291 */ 292 int 293 netisr_queue(int num, struct mbuf *m) 294 { 295 struct netisr *ni; 296 297 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 298 ("bad isr %d", num)); 299 ni = &netisrs[num]; 300 if (ni->ni_queue == NULL) { 301 isrstat.isrs_drop++; 302 m_freem(m); 303 return (ENXIO); 304 } 305 isrstat.isrs_queued++; 306 if (!IF_HANDOFF(ni->ni_queue, m, NULL)) 307 return (ENOBUFS); /* IF_HANDOFF has free'd the mbuf */ 308 schednetisr(num); 309 return (0); 310 } 311 312 static void 313 swi_net(void *dummy) 314 { 315 struct netisr *ni; 316 u_int bits; 317 int i; 318 #ifdef DEVICE_POLLING 319 const int polling = 1; 320 #else 321 const int polling = 0; 322 #endif 323 324 do { 325 bits = atomic_readandclear_int(&netisr); 326 if (bits == 0) 327 break; 328 while ((i = ffs(bits)) != 0) { 329 isrstat.isrs_swi_count++; 330 i--; 331 bits &= ~(1 << i); 332 ni = &netisrs[i]; 333 if (ni->ni_handler == NULL) { 334 printf("swi_net: unregistered isr %d.\n", i); 335 continue; 336 } 337 if ((ni->ni_flags & NETISR_MPSAFE) == 0) { 338 mtx_lock(&Giant); 339 if (ni->ni_queue == NULL) 340 ni->ni_handler(NULL); 341 else 342 netisr_processqueue(ni); 343 mtx_unlock(&Giant); 344 } else { 345 if (ni->ni_queue == NULL) 346 ni->ni_handler(NULL); 347 else 348 netisr_processqueue(ni); 349 } 350 } 351 } while (polling); 352 } 353 354 static void 355 start_netisr(void *dummy) 356 { 357 358 if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, INTR_MPSAFE, &net_ih)) 359 panic("start_netisr"); 360 } 361 SYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL) 362