1 /*- 2 * Copyright (c) 2001,2002,2003 Jonathan Lemon <jlemon@FreeBSD.org> 3 * Copyright (c) 1997, Stefan Esser <se@freebsd.org> 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 */ 29 30 #include "opt_net.h" 31 32 #include <sys/param.h> 33 #include <sys/bus.h> 34 #include <sys/rtprio.h> 35 #include <sys/systm.h> 36 #include <sys/interrupt.h> 37 #include <sys/kernel.h> 38 #include <sys/kthread.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/proc.h> 42 #include <sys/random.h> 43 #include <sys/resourcevar.h> 44 #include <sys/sysctl.h> 45 #include <sys/unistd.h> 46 #include <machine/atomic.h> 47 #include <machine/cpu.h> 48 #include <machine/stdarg.h> 49 50 #include <sys/mbuf.h> 51 #include <sys/socket.h> 52 53 #include <net/if.h> 54 #include <net/if_types.h> 55 #include <net/if_var.h> 56 #include <net/netisr.h> 57 58 /* 59 * debug_mpsafenet controls network subsystem-wide use of the Giant lock, 60 * from system calls down to interrupt handlers. It can be changed only via 61 * a tunable at boot, not at run-time, due to the complexity of unwinding. 62 * The compiled default is set via a kernel option; right now, the default 63 * unless otherwise specified is to run the network stack without Giant. 64 */ 65 #ifdef NET_WITH_GIANT 66 int debug_mpsafenet = 0; 67 #else 68 int debug_mpsafenet = 1; 69 #endif 70 int debug_mpsafenet_toolatetotwiddle = 0; 71 72 TUNABLE_INT("debug.mpsafenet", &debug_mpsafenet); 73 SYSCTL_INT(_debug, OID_AUTO, mpsafenet, CTLFLAG_RD, &debug_mpsafenet, 0, 74 "Enable/disable MPSAFE network support"); 75 76 volatile unsigned int netisr; /* scheduling bits for network */ 77 78 struct netisr { 79 netisr_t *ni_handler; 80 struct ifqueue *ni_queue; 81 int ni_flags; 82 } netisrs[32]; 83 84 static void *net_ih; 85 86 /* 87 * Note all network code is currently capable of running MPSAFE; however, 88 * most of it is. Since those sections that are not are generally optional 89 * components not shipped with default kernels, we provide a basic way to 90 * determine whether MPSAFE operation is permitted: based on a default of 91 * yes, we permit non-MPSAFE components to use a registration call to 92 * identify that they require Giant. If the system is early in the boot 93 * process still, then we change the debug_mpsafenet setting to choose a 94 * non-MPSAFE execution mode (degraded). If it's too late for that (since 95 * the setting cannot be changed at run time), we generate a console warning 96 * that the configuration may be unsafe. 97 */ 98 static int mpsafe_warn_count; 99 100 /* 101 * Function call implementing registration of a non-MPSAFE network component. 102 */ 103 void 104 net_warn_not_mpsafe(const char *component) 105 { 106 107 /* 108 * If we're running with Giant over the network stack, there is no 109 * problem. 110 */ 111 if (!debug_mpsafenet) 112 return; 113 114 /* 115 * If it's not too late to change the MPSAFE setting for the network 116 * stack, do so now. This effectively suppresses warnings by 117 * components registering later. 118 */ 119 if (!debug_mpsafenet_toolatetotwiddle) { 120 debug_mpsafenet = 0; 121 printf("WARNING: debug.mpsafenet forced to 0 as %s requires " 122 "Giant\n", component); 123 return; 124 } 125 126 /* 127 * We must run without Giant, so generate a console warning with some 128 * information with what to do about it. The system may be operating 129 * unsafely, however. 130 */ 131 printf("WARNING: Network stack Giant-free, but %s requires Giant.\n", 132 component); 133 if (mpsafe_warn_count == 0) 134 printf(" Consider adding 'options NET_WITH_GIANT' or " 135 "setting debug.mpsafenet=0\n"); 136 mpsafe_warn_count++; 137 } 138 139 /* 140 * This sysinit is run after any pre-loaded or compiled-in components have 141 * announced that they require Giant, but before any modules loaded at 142 * run-time. 143 */ 144 static void 145 net_mpsafe_toolate(void *arg) 146 { 147 148 debug_mpsafenet_toolatetotwiddle = 1; 149 150 if (!debug_mpsafenet) 151 printf("WARNING: MPSAFE network stack disabled, expect " 152 "reduced performance.\n"); 153 } 154 155 SYSINIT(net_mpsafe_toolate, SI_SUB_SETTINGS, SI_ORDER_ANY, net_mpsafe_toolate, 156 NULL); 157 158 void 159 legacy_setsoftnet(void) 160 { 161 swi_sched(net_ih, 0); 162 } 163 164 void 165 netisr_register(int num, netisr_t *handler, struct ifqueue *inq, int flags) 166 { 167 168 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 169 ("bad isr %d", num)); 170 netisrs[num].ni_handler = handler; 171 netisrs[num].ni_queue = inq; 172 if ((flags & NETISR_MPSAFE) && !debug_mpsafenet) 173 flags &= ~NETISR_MPSAFE; 174 netisrs[num].ni_flags = flags; 175 } 176 177 void 178 netisr_unregister(int num) 179 { 180 struct netisr *ni; 181 182 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 183 ("bad isr %d", num)); 184 ni = &netisrs[num]; 185 ni->ni_handler = NULL; 186 if (ni->ni_queue != NULL) 187 IF_DRAIN(ni->ni_queue); 188 } 189 190 struct isrstat { 191 int isrs_count; /* dispatch count */ 192 int isrs_directed; /* ...directly dispatched */ 193 int isrs_deferred; /* ...queued instead */ 194 int isrs_queued; /* intentionally queueued */ 195 int isrs_drop; /* dropped 'cuz no handler */ 196 int isrs_swi_count; /* swi_net handlers called */ 197 }; 198 static struct isrstat isrstat; 199 200 SYSCTL_NODE(_net, OID_AUTO, isr, CTLFLAG_RW, 0, "netisr counters"); 201 202 static int netisr_enable = 0; 203 SYSCTL_INT(_net_isr, OID_AUTO, enable, CTLFLAG_RW, 204 &netisr_enable, 0, "enable direct dispatch"); 205 TUNABLE_INT("net.isr.enable", &netisr_enable); 206 207 SYSCTL_INT(_net_isr, OID_AUTO, count, CTLFLAG_RD, 208 &isrstat.isrs_count, 0, ""); 209 SYSCTL_INT(_net_isr, OID_AUTO, directed, CTLFLAG_RD, 210 &isrstat.isrs_directed, 0, ""); 211 SYSCTL_INT(_net_isr, OID_AUTO, deferred, CTLFLAG_RD, 212 &isrstat.isrs_deferred, 0, ""); 213 SYSCTL_INT(_net_isr, OID_AUTO, queued, CTLFLAG_RD, 214 &isrstat.isrs_queued, 0, ""); 215 SYSCTL_INT(_net_isr, OID_AUTO, drop, CTLFLAG_RD, 216 &isrstat.isrs_drop, 0, ""); 217 SYSCTL_INT(_net_isr, OID_AUTO, swi_count, CTLFLAG_RD, 218 &isrstat.isrs_swi_count, 0, ""); 219 220 /* 221 * Process all packets currently present in a netisr queue. Used to 222 * drain an existing set of packets waiting for processing when we 223 * begin direct dispatch, to avoid processing packets out of order. 224 */ 225 static void 226 netisr_processqueue(struct netisr *ni) 227 { 228 struct mbuf *m; 229 230 for (;;) { 231 IF_DEQUEUE(ni->ni_queue, m); 232 if (m == NULL) 233 break; 234 ni->ni_handler(m); 235 } 236 } 237 238 /* 239 * Call the netisr directly instead of queueing the packet, if possible. 240 */ 241 void 242 netisr_dispatch(int num, struct mbuf *m) 243 { 244 struct netisr *ni; 245 246 isrstat.isrs_count++; /* XXX redundant */ 247 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 248 ("bad isr %d", num)); 249 ni = &netisrs[num]; 250 if (ni->ni_queue == NULL) { 251 isrstat.isrs_drop++; 252 m_freem(m); 253 return; 254 } 255 /* 256 * Do direct dispatch only for MPSAFE netisrs (and 257 * only when enabled). Note that when a netisr is 258 * marked MPSAFE we permit multiple concurrent instances 259 * to run. We guarantee only the order in which 260 * packets are processed for each "dispatch point" in 261 * the system (i.e. call to netisr_dispatch or 262 * netisr_queue). This insures ordering of packets 263 * from an interface but does not guarantee ordering 264 * between multiple places in the system (e.g. IP 265 * dispatched from interfaces vs. IP queued from IPSec). 266 */ 267 if (netisr_enable && (ni->ni_flags & NETISR_MPSAFE)) { 268 isrstat.isrs_directed++; 269 /* 270 * NB: We used to drain the queue before handling 271 * the packet but now do not. Doing so here will 272 * not preserve ordering so instead we fallback to 273 * guaranteeing order only from dispatch points 274 * in the system (see above). 275 */ 276 ni->ni_handler(m); 277 } else { 278 isrstat.isrs_deferred++; 279 if (IF_HANDOFF(ni->ni_queue, m, NULL)) 280 schednetisr(num); 281 } 282 } 283 284 /* 285 * Same as above, but always queue. 286 * This is either used in places where we are not confident that 287 * direct dispatch is possible, or where queueing is required. 288 * It returns (0) on success and ERRNO on failure. On failure the 289 * mbuf has been free'd. 290 */ 291 int 292 netisr_queue(int num, struct mbuf *m) 293 { 294 struct netisr *ni; 295 296 KASSERT(!(num < 0 || num >= (sizeof(netisrs)/sizeof(*netisrs))), 297 ("bad isr %d", num)); 298 ni = &netisrs[num]; 299 if (ni->ni_queue == NULL) { 300 isrstat.isrs_drop++; 301 m_freem(m); 302 return (ENXIO); 303 } 304 isrstat.isrs_queued++; 305 if (!IF_HANDOFF(ni->ni_queue, m, NULL)) 306 return (ENOBUFS); /* IF_HANDOFF has free'd the mbuf */ 307 schednetisr(num); 308 return (0); 309 } 310 311 static void 312 swi_net(void *dummy) 313 { 314 struct netisr *ni; 315 u_int bits; 316 int i; 317 #ifdef DEVICE_POLLING 318 const int polling = 1; 319 #else 320 const int polling = 0; 321 #endif 322 323 do { 324 bits = atomic_readandclear_int(&netisr); 325 if (bits == 0) 326 break; 327 while ((i = ffs(bits)) != 0) { 328 isrstat.isrs_swi_count++; 329 i--; 330 bits &= ~(1 << i); 331 ni = &netisrs[i]; 332 if (ni->ni_handler == NULL) { 333 printf("swi_net: unregistered isr %d.\n", i); 334 continue; 335 } 336 if ((ni->ni_flags & NETISR_MPSAFE) == 0) { 337 mtx_lock(&Giant); 338 if (ni->ni_queue == NULL) 339 ni->ni_handler(NULL); 340 else 341 netisr_processqueue(ni); 342 mtx_unlock(&Giant); 343 } else { 344 if (ni->ni_queue == NULL) 345 ni->ni_handler(NULL); 346 else 347 netisr_processqueue(ni); 348 } 349 } 350 } while (polling); 351 } 352 353 static void 354 start_netisr(void *dummy) 355 { 356 357 if (swi_add(NULL, "net", swi_net, NULL, SWI_NET, INTR_MPSAFE, &net_ih)) 358 panic("start_netisr"); 359 } 360 SYSINIT(start_netisr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_netisr, NULL) 361