1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2017, Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/tuneable.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/sdt.h>
37 #include <sys/mutex.h>
38 #include <sys/bitmap.h>
39 #include <sys/atomic.h>
40 #include <sys/sunddi.h>
41 #include <sys/kobj.h>
42 #include <sys/disp.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/zone.h>
45 #include <sys/netstack.h>
46
47 /*
48 * What we use so that the zones framework can tell us about new zones,
49 * which we use to create new stacks.
50 */
51 static zone_key_t netstack_zone_key;
52
53 static int netstack_initialized = 0;
54
55 /*
56 * Track the registered netstacks.
57 * The global lock protects
58 * - ns_reg
59 * - the list starting at netstack_head and following the netstack_next
60 * pointers.
61 */
62 static kmutex_t netstack_g_lock;
63
64 /*
65 * Registry of netstacks with their create/shutdown/destory functions.
66 */
67 static struct netstack_registry ns_reg[NS_MAX];
68
69 /*
70 * Global list of existing stacks. We use this when a new zone with
71 * an exclusive IP instance is created.
72 *
73 * Note that in some cases a netstack_t needs to stay around after the zone
74 * has gone away. This is because there might be outstanding references
75 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76 * structure and all the foo_stack_t's hanging off of it will be cleaned up
77 * when the last reference to it is dropped.
78 * However, the same zone might be rebooted. That is handled using the
79 * assumption that the zones framework picks a new zoneid each time a zone
80 * is (re)booted. We assert for that condition in netstack_zone_create().
81 * Thus the old netstack_t can take its time for things to time out.
82 */
83 static netstack_t *netstack_head;
84
85 /*
86 * To support kstat_create_netstack() using kstat_zone_add we need
87 * to track both
88 * - all zoneids that use the global/shared stack
89 * - all kstats that have been added for the shared stack
90 */
91 struct shared_zone_list {
92 struct shared_zone_list *sz_next;
93 zoneid_t sz_zoneid;
94 };
95
96 struct shared_kstat_list {
97 struct shared_kstat_list *sk_next;
98 kstat_t *sk_kstat;
99 };
100
101 static kmutex_t netstack_shared_lock; /* protects the following two */
102 static struct shared_zone_list *netstack_shared_zones;
103 static struct shared_kstat_list *netstack_shared_kstats;
104
105 static void *netstack_zone_create(zoneid_t zoneid);
106 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107 static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
108
109 static void netstack_shared_zone_add(zoneid_t zoneid);
110 static void netstack_shared_zone_remove(zoneid_t zoneid);
111 static void netstack_shared_kstat_add(kstat_t *ks);
112 static void netstack_shared_kstat_remove(kstat_t *ks);
113
114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115
116 static void apply_all_netstacks(int, applyfn_t *);
117 static void apply_all_modules(netstack_t *, applyfn_t *);
118 static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
124 kmutex_t *);
125
126 static void netstack_hold_locked(netstack_t *);
127
128 static ksema_t netstack_reap_limiter;
129 /*
130 * Hard-coded constant, but since this is not tunable in real-time, it seems
131 * making it an /etc/system tunable is better than nothing.
132 */
133 uint_t netstack_outstanding_reaps = 1024;
134
135 void
netstack_init(void)136 netstack_init(void)
137 {
138 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
139 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
140
141 sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
142 SEMA_DRIVER, NULL);
143
144 netstack_initialized = 1;
145
146 /*
147 * We want to be informed each time a zone is created or
148 * destroyed in the kernel, so we can maintain the
149 * stack instance information.
150 */
151 zone_key_create(&netstack_zone_key, netstack_zone_create,
152 netstack_zone_shutdown, netstack_zone_destroy);
153 }
154
155 /*
156 * Register a new module with the framework.
157 * This registers interest in changes to the set of netstacks.
158 * The createfn and destroyfn are required, but the shutdownfn can be
159 * NULL.
160 * Note that due to the current zsd implementation, when the create
161 * function is called the zone isn't fully present, thus functions
162 * like zone_find_by_* will fail, hence the create function can not
163 * use many zones kernel functions including zcmn_err().
164 */
165 void
netstack_register(int moduleid,void * (* module_create)(netstackid_t,netstack_t *),void (* module_shutdown)(netstackid_t,void *),void (* module_destroy)(netstackid_t,void *))166 netstack_register(int moduleid,
167 void *(*module_create)(netstackid_t, netstack_t *),
168 void (*module_shutdown)(netstackid_t, void *),
169 void (*module_destroy)(netstackid_t, void *))
170 {
171 netstack_t *ns;
172
173 ASSERT(netstack_initialized);
174 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
175 ASSERT(module_create != NULL);
176
177 /*
178 * Make instances created after this point in time run the create
179 * callback.
180 */
181 mutex_enter(&netstack_g_lock);
182 ASSERT(ns_reg[moduleid].nr_create == NULL);
183 ASSERT(ns_reg[moduleid].nr_flags == 0);
184 ns_reg[moduleid].nr_create = module_create;
185 ns_reg[moduleid].nr_shutdown = module_shutdown;
186 ns_reg[moduleid].nr_destroy = module_destroy;
187 ns_reg[moduleid].nr_flags = NRF_REGISTERED;
188
189 /*
190 * Determine the set of stacks that exist before we drop the lock.
191 * Set NSS_CREATE_NEEDED for each of those.
192 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
193 * set, but check NSF_CLOSING to be sure.
194 */
195 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
196 nm_state_t *nms = &ns->netstack_m_state[moduleid];
197
198 mutex_enter(&ns->netstack_lock);
199 if (!(ns->netstack_flags & NSF_CLOSING) &&
200 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
201 nms->nms_flags |= NSS_CREATE_NEEDED;
202 DTRACE_PROBE2(netstack__create__needed,
203 netstack_t *, ns, int, moduleid);
204 }
205 mutex_exit(&ns->netstack_lock);
206 }
207 mutex_exit(&netstack_g_lock);
208
209 /*
210 * At this point in time a new instance can be created or an instance
211 * can be destroyed, or some other module can register or unregister.
212 * Make sure we either run all the create functions for this moduleid
213 * or we wait for any other creators for this moduleid.
214 */
215 apply_all_netstacks(moduleid, netstack_apply_create);
216 }
217
218 void
netstack_unregister(int moduleid)219 netstack_unregister(int moduleid)
220 {
221 netstack_t *ns;
222
223 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
224
225 ASSERT(ns_reg[moduleid].nr_create != NULL);
226 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
227
228 mutex_enter(&netstack_g_lock);
229 /*
230 * Determine the set of stacks that exist before we drop the lock.
231 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
232 * That ensures that when we return all the callbacks for existing
233 * instances have completed. And since we set NRF_DYING no new
234 * instances can use this module.
235 */
236 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
237 boolean_t created = B_FALSE;
238 nm_state_t *nms = &ns->netstack_m_state[moduleid];
239
240 mutex_enter(&ns->netstack_lock);
241
242 /*
243 * We need to be careful here. We could actually have a netstack
244 * being created as we speak waiting for us to let go of this
245 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
246 * have gotten to the point of completing it yet. If
247 * NSS_CREATE_NEEDED, we can safely just remove it here and
248 * never create the module. However, if NSS_CREATE_INPROGRESS is
249 * set, we need to still flag this module for shutdown and
250 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
251 *
252 * It is safe to do that because of two different guarantees
253 * that exist in the system. The first is that before we do a
254 * create, shutdown, or destroy, we ensure that nothing else is
255 * in progress in the system for this netstack and wait for it
256 * to complete. Secondly, because the zone is being created, we
257 * know that the following call to apply_all_netstack will block
258 * on the zone finishing its initialization.
259 */
260 if (nms->nms_flags & NSS_CREATE_NEEDED)
261 nms->nms_flags &= ~NSS_CREATE_NEEDED;
262
263 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
264 nms->nms_flags & NSS_CREATE_COMPLETED)
265 created = B_TRUE;
266
267 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
268 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
269 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
270 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
271 DTRACE_PROBE2(netstack__shutdown__needed,
272 netstack_t *, ns, int, moduleid);
273 }
274 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
275 ns_reg[moduleid].nr_destroy != NULL && created &&
276 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
277 nms->nms_flags |= NSS_DESTROY_NEEDED;
278 DTRACE_PROBE2(netstack__destroy__needed,
279 netstack_t *, ns, int, moduleid);
280 }
281 mutex_exit(&ns->netstack_lock);
282 }
283 /*
284 * Prevent any new netstack from calling the registered create
285 * function, while keeping the function pointers in place until the
286 * shutdown and destroy callbacks are complete.
287 */
288 ns_reg[moduleid].nr_flags |= NRF_DYING;
289 mutex_exit(&netstack_g_lock);
290
291 apply_all_netstacks(moduleid, netstack_apply_shutdown);
292 apply_all_netstacks(moduleid, netstack_apply_destroy);
293
294 /*
295 * Clear the nms_flags so that we can handle this module
296 * being loaded again.
297 * Also remove the registered functions.
298 */
299 mutex_enter(&netstack_g_lock);
300 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
301 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
302 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
303 nm_state_t *nms = &ns->netstack_m_state[moduleid];
304
305 mutex_enter(&ns->netstack_lock);
306 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
307 nms->nms_flags = 0;
308 DTRACE_PROBE2(netstack__destroy__done,
309 netstack_t *, ns, int, moduleid);
310 }
311 mutex_exit(&ns->netstack_lock);
312 }
313
314 ns_reg[moduleid].nr_create = NULL;
315 ns_reg[moduleid].nr_shutdown = NULL;
316 ns_reg[moduleid].nr_destroy = NULL;
317 ns_reg[moduleid].nr_flags = 0;
318 mutex_exit(&netstack_g_lock);
319 }
320
321 /*
322 * Lookup and/or allocate a netstack for this zone.
323 */
324 static void *
netstack_zone_create(zoneid_t zoneid)325 netstack_zone_create(zoneid_t zoneid)
326 {
327 netstackid_t stackid;
328 netstack_t *ns;
329 netstack_t **nsp;
330 zone_t *zone;
331 int i;
332
333 ASSERT(netstack_initialized);
334
335 zone = zone_find_by_id_nolock(zoneid);
336 ASSERT(zone != NULL);
337
338 if (zone->zone_flags & ZF_NET_EXCL) {
339 stackid = zoneid;
340 } else {
341 /* Look for the stack instance for the global */
342 stackid = GLOBAL_NETSTACKID;
343 }
344
345 /* Allocate even if it isn't needed; simplifies locking */
346 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
347
348 /* Look if there is a matching stack instance */
349 mutex_enter(&netstack_g_lock);
350 for (nsp = &netstack_head; *nsp != NULL;
351 nsp = &((*nsp)->netstack_next)) {
352 if ((*nsp)->netstack_stackid == stackid) {
353 /*
354 * Should never find a pre-existing exclusive stack
355 */
356 VERIFY(stackid == GLOBAL_NETSTACKID);
357 kmem_free(ns, sizeof (netstack_t));
358 ns = *nsp;
359 mutex_enter(&ns->netstack_lock);
360 ns->netstack_numzones++;
361 mutex_exit(&ns->netstack_lock);
362 mutex_exit(&netstack_g_lock);
363 DTRACE_PROBE1(netstack__inc__numzones,
364 netstack_t *, ns);
365 /* Record that we have a new shared stack zone */
366 netstack_shared_zone_add(zoneid);
367 zone->zone_netstack = ns;
368 return (ns);
369 }
370 }
371 /* Not found */
372 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
373 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
374 ns->netstack_stackid = zoneid;
375 ns->netstack_numzones = 1;
376 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
377 ns->netstack_flags = NSF_UNINIT;
378 *nsp = ns;
379 zone->zone_netstack = ns;
380
381 mutex_enter(&ns->netstack_lock);
382 /*
383 * Mark this netstack as having a CREATE running so
384 * any netstack_register/netstack_unregister waits for
385 * the existing create callbacks to complete in moduleid order
386 */
387 ns->netstack_flags |= NSF_ZONE_CREATE;
388
389 /*
390 * Determine the set of module create functions that need to be
391 * called before we drop the lock.
392 * Set NSS_CREATE_NEEDED for each of those.
393 * Skip any with NRF_DYING set, since those are in the process of
394 * going away, by checking for flags being exactly NRF_REGISTERED.
395 */
396 for (i = 0; i < NS_MAX; i++) {
397 nm_state_t *nms = &ns->netstack_m_state[i];
398
399 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
400
401 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
402 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
403 nms->nms_flags |= NSS_CREATE_NEEDED;
404 DTRACE_PROBE2(netstack__create__needed,
405 netstack_t *, ns, int, i);
406 }
407 }
408 mutex_exit(&ns->netstack_lock);
409 mutex_exit(&netstack_g_lock);
410
411 apply_all_modules(ns, netstack_apply_create);
412
413 /* Tell any waiting netstack_register/netstack_unregister to proceed */
414 mutex_enter(&ns->netstack_lock);
415 ns->netstack_flags &= ~NSF_UNINIT;
416 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
417 ns->netstack_flags &= ~NSF_ZONE_CREATE;
418 cv_broadcast(&ns->netstack_cv);
419 mutex_exit(&ns->netstack_lock);
420
421 return (ns);
422 }
423
424 /* ARGSUSED */
425 static void
netstack_zone_shutdown(zoneid_t zoneid,void * arg)426 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
427 {
428 netstack_t *ns = (netstack_t *)arg;
429 int i;
430
431 ASSERT(arg != NULL);
432
433 mutex_enter(&ns->netstack_lock);
434 ASSERT(ns->netstack_numzones > 0);
435 if (ns->netstack_numzones != 1) {
436 /* Stack instance being used by other zone */
437 mutex_exit(&ns->netstack_lock);
438 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
439 return;
440 }
441 mutex_exit(&ns->netstack_lock);
442
443 mutex_enter(&netstack_g_lock);
444 mutex_enter(&ns->netstack_lock);
445 /*
446 * Mark this netstack as having a SHUTDOWN running so
447 * any netstack_register/netstack_unregister waits for
448 * the existing create callbacks to complete in moduleid order
449 */
450 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
451 ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
452
453 /*
454 * Determine the set of stacks that exist before we drop the lock.
455 * Set NSS_SHUTDOWN_NEEDED for each of those.
456 */
457 for (i = 0; i < NS_MAX; i++) {
458 nm_state_t *nms = &ns->netstack_m_state[i];
459
460 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
461 ns_reg[i].nr_shutdown != NULL &&
462 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
463 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
464 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
465 DTRACE_PROBE2(netstack__shutdown__needed,
466 netstack_t *, ns, int, i);
467 }
468 }
469 mutex_exit(&ns->netstack_lock);
470 mutex_exit(&netstack_g_lock);
471
472 /*
473 * Call the shutdown function for all registered modules for this
474 * netstack.
475 */
476 apply_all_modules_reverse(ns, netstack_apply_shutdown);
477
478 /* Tell any waiting netstack_register/netstack_unregister to proceed */
479 mutex_enter(&ns->netstack_lock);
480 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
481 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
482 cv_broadcast(&ns->netstack_cv);
483 mutex_exit(&ns->netstack_lock);
484 }
485
486 /*
487 * Common routine to release a zone.
488 * If this was the last zone using the stack instance then prepare to
489 * have the refcnt dropping to zero free the zone.
490 */
491 /* ARGSUSED */
492 static void
netstack_zone_destroy(zoneid_t zoneid,void * arg)493 netstack_zone_destroy(zoneid_t zoneid, void *arg)
494 {
495 netstack_t *ns = (netstack_t *)arg;
496
497 ASSERT(arg != NULL);
498
499 mutex_enter(&ns->netstack_lock);
500 ASSERT(ns->netstack_numzones > 0);
501 ns->netstack_numzones--;
502 if (ns->netstack_numzones != 0) {
503 /* Stack instance being used by other zone */
504 mutex_exit(&ns->netstack_lock);
505 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
506 /* Record that we a shared stack zone has gone away */
507 netstack_shared_zone_remove(zoneid);
508 return;
509 }
510 /*
511 * Set CLOSING so that netstack_find_by will not find it.
512 */
513 ns->netstack_flags |= NSF_CLOSING;
514 mutex_exit(&ns->netstack_lock);
515 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
516 /* No other thread can call zone_destroy for this stack */
517
518 /*
519 * Decrease refcnt to account for the one in netstack_zone_init()
520 */
521 netstack_rele(ns);
522 }
523
524 /*
525 * Called when the reference count drops to zero.
526 * Call the destroy functions for each registered module.
527 */
528 static void
netstack_stack_inactive(netstack_t * ns)529 netstack_stack_inactive(netstack_t *ns)
530 {
531 int i;
532
533 mutex_enter(&netstack_g_lock);
534 mutex_enter(&ns->netstack_lock);
535 /*
536 * Mark this netstack as having a DESTROY running so
537 * any netstack_register/netstack_unregister waits for
538 * the existing destroy callbacks to complete in reverse moduleid order
539 */
540 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
541 ns->netstack_flags |= NSF_ZONE_DESTROY;
542 /*
543 * If the shutdown callback wasn't called earlier (e.g., if this is
544 * a netstack shared between multiple zones), then we schedule it now.
545 *
546 * Determine the set of stacks that exist before we drop the lock.
547 * Set NSS_DESTROY_NEEDED for each of those. That
548 * ensures that when we return all the callbacks for existing
549 * instances have completed.
550 */
551 for (i = 0; i < NS_MAX; i++) {
552 nm_state_t *nms = &ns->netstack_m_state[i];
553
554 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
555 ns_reg[i].nr_shutdown != NULL &&
556 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
557 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
558 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
559 DTRACE_PROBE2(netstack__shutdown__needed,
560 netstack_t *, ns, int, i);
561 }
562
563 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
564 ns_reg[i].nr_destroy != NULL &&
565 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
566 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
567 nms->nms_flags |= NSS_DESTROY_NEEDED;
568 DTRACE_PROBE2(netstack__destroy__needed,
569 netstack_t *, ns, int, i);
570 }
571 }
572 mutex_exit(&ns->netstack_lock);
573 mutex_exit(&netstack_g_lock);
574
575 /*
576 * Call the shutdown and destroy functions for all registered modules
577 * for this netstack.
578 *
579 * Since there are some ordering dependencies between the modules we
580 * tear them down in the reverse order of what was used to create them.
581 *
582 * Since a netstack_t is never reused (when a zone is rebooted it gets
583 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
584 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
585 * That is different than in the netstack_unregister() case.
586 */
587 apply_all_modules_reverse(ns, netstack_apply_shutdown);
588 apply_all_modules_reverse(ns, netstack_apply_destroy);
589
590 /* Tell any waiting netstack_register/netstack_unregister to proceed */
591 mutex_enter(&ns->netstack_lock);
592 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
593 ns->netstack_flags &= ~NSF_ZONE_DESTROY;
594 cv_broadcast(&ns->netstack_cv);
595 mutex_exit(&ns->netstack_lock);
596 }
597
598 /*
599 * Apply a function to all netstacks for a particular moduleid.
600 *
601 * If there is any zone activity (due to a zone being created, shutdown,
602 * or destroyed) we wait for that to complete before we proceed. This ensures
603 * that the moduleids are processed in order when a zone is created or
604 * destroyed.
605 *
606 * The applyfn has to drop netstack_g_lock if it does some work.
607 * In that case we don't follow netstack_next,
608 * even if it is possible to do so without any hazards. This is
609 * because we want the design to allow for the list of netstacks threaded
610 * by netstack_next to change in any arbitrary way during the time the
611 * lock was dropped.
612 *
613 * It is safe to restart the loop at netstack_head since the applyfn
614 * changes netstack_m_state as it processes things, so a subsequent
615 * pass through will have no effect in applyfn, hence the loop will terminate
616 * in at worst O(N^2).
617 */
618 static void
apply_all_netstacks(int moduleid,applyfn_t * applyfn)619 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
620 {
621 netstack_t *ns;
622
623 mutex_enter(&netstack_g_lock);
624 ns = netstack_head;
625 while (ns != NULL) {
626 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
627 /* Lock dropped - restart at head */
628 ns = netstack_head;
629 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
630 /* Lock dropped - restart at head */
631 ns = netstack_head;
632 } else {
633 ns = ns->netstack_next;
634 }
635 }
636 mutex_exit(&netstack_g_lock);
637 }
638
639 /*
640 * Apply a function to all moduleids for a particular netstack.
641 *
642 * Since the netstack linkage doesn't matter in this case we can
643 * ignore whether the function drops the lock.
644 */
645 static void
apply_all_modules(netstack_t * ns,applyfn_t * applyfn)646 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
647 {
648 int i;
649
650 mutex_enter(&netstack_g_lock);
651 for (i = 0; i < NS_MAX; i++) {
652 /*
653 * We don't care whether the lock was dropped
654 * since we are not iterating over netstack_head.
655 */
656 (void) (applyfn)(&netstack_g_lock, ns, i);
657 }
658 mutex_exit(&netstack_g_lock);
659 }
660
661 /* Like the above but in reverse moduleid order */
662 static void
apply_all_modules_reverse(netstack_t * ns,applyfn_t * applyfn)663 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
664 {
665 int i;
666
667 mutex_enter(&netstack_g_lock);
668 for (i = NS_MAX-1; i >= 0; i--) {
669 /*
670 * We don't care whether the lock was dropped
671 * since we are not iterating over netstack_head.
672 */
673 (void) (applyfn)(&netstack_g_lock, ns, i);
674 }
675 mutex_exit(&netstack_g_lock);
676 }
677
678 /*
679 * Call the create function for the ns and moduleid if CREATE_NEEDED
680 * is set.
681 * If some other thread gets here first and sets *_INPROGRESS, then
682 * we wait for that thread to complete so that we can ensure that
683 * all the callbacks are done when we've looped over all netstacks/moduleids.
684 *
685 * When we call the create function, we temporarily drop the netstack_lock
686 * held by the caller, and return true to tell the caller it needs to
687 * re-evalute the state.
688 */
689 static boolean_t
netstack_apply_create(kmutex_t * lockp,netstack_t * ns,int moduleid)690 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
691 {
692 void *result;
693 netstackid_t stackid;
694 nm_state_t *nms = &ns->netstack_m_state[moduleid];
695 boolean_t dropped = B_FALSE;
696
697 ASSERT(MUTEX_HELD(lockp));
698 mutex_enter(&ns->netstack_lock);
699
700 if (wait_for_nms_inprogress(ns, nms, lockp))
701 dropped = B_TRUE;
702
703 if (nms->nms_flags & NSS_CREATE_NEEDED) {
704 nms->nms_flags &= ~NSS_CREATE_NEEDED;
705 nms->nms_flags |= NSS_CREATE_INPROGRESS;
706 DTRACE_PROBE2(netstack__create__inprogress,
707 netstack_t *, ns, int, moduleid);
708 mutex_exit(&ns->netstack_lock);
709 mutex_exit(lockp);
710 dropped = B_TRUE;
711
712 ASSERT(ns_reg[moduleid].nr_create != NULL);
713 stackid = ns->netstack_stackid;
714 DTRACE_PROBE2(netstack__create__start,
715 netstackid_t, stackid,
716 netstack_t *, ns);
717 result = (ns_reg[moduleid].nr_create)(stackid, ns);
718 DTRACE_PROBE2(netstack__create__end,
719 void *, result, netstack_t *, ns);
720
721 ASSERT(result != NULL);
722 mutex_enter(lockp);
723 mutex_enter(&ns->netstack_lock);
724 ns->netstack_modules[moduleid] = result;
725 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
726 nms->nms_flags |= NSS_CREATE_COMPLETED;
727 cv_broadcast(&nms->nms_cv);
728 DTRACE_PROBE2(netstack__create__completed,
729 netstack_t *, ns, int, moduleid);
730 mutex_exit(&ns->netstack_lock);
731 return (dropped);
732 } else {
733 mutex_exit(&ns->netstack_lock);
734 return (dropped);
735 }
736 }
737
738 /*
739 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
740 * is set.
741 * If some other thread gets here first and sets *_INPROGRESS, then
742 * we wait for that thread to complete so that we can ensure that
743 * all the callbacks are done when we've looped over all netstacks/moduleids.
744 *
745 * When we call the shutdown function, we temporarily drop the netstack_lock
746 * held by the caller, and return true to tell the caller it needs to
747 * re-evalute the state.
748 */
749 static boolean_t
netstack_apply_shutdown(kmutex_t * lockp,netstack_t * ns,int moduleid)750 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
751 {
752 netstackid_t stackid;
753 void * netstack_module;
754 nm_state_t *nms = &ns->netstack_m_state[moduleid];
755 boolean_t dropped = B_FALSE;
756
757 ASSERT(MUTEX_HELD(lockp));
758 mutex_enter(&ns->netstack_lock);
759
760 if (wait_for_nms_inprogress(ns, nms, lockp))
761 dropped = B_TRUE;
762
763 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
764 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
765 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
766 DTRACE_PROBE2(netstack__shutdown__inprogress,
767 netstack_t *, ns, int, moduleid);
768 mutex_exit(&ns->netstack_lock);
769 mutex_exit(lockp);
770 dropped = B_TRUE;
771
772 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
773 stackid = ns->netstack_stackid;
774 netstack_module = ns->netstack_modules[moduleid];
775 DTRACE_PROBE2(netstack__shutdown__start,
776 netstackid_t, stackid,
777 void *, netstack_module);
778 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
779 DTRACE_PROBE1(netstack__shutdown__end,
780 netstack_t *, ns);
781
782 mutex_enter(lockp);
783 mutex_enter(&ns->netstack_lock);
784 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
785 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
786 cv_broadcast(&nms->nms_cv);
787 DTRACE_PROBE2(netstack__shutdown__completed,
788 netstack_t *, ns, int, moduleid);
789 mutex_exit(&ns->netstack_lock);
790 return (dropped);
791 } else {
792 mutex_exit(&ns->netstack_lock);
793 return (dropped);
794 }
795 }
796
797 /*
798 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
799 * is set.
800 * If some other thread gets here first and sets *_INPROGRESS, then
801 * we wait for that thread to complete so that we can ensure that
802 * all the callbacks are done when we've looped over all netstacks/moduleids.
803 *
804 * When we call the destroy function, we temporarily drop the netstack_lock
805 * held by the caller, and return true to tell the caller it needs to
806 * re-evalute the state.
807 */
808 static boolean_t
netstack_apply_destroy(kmutex_t * lockp,netstack_t * ns,int moduleid)809 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
810 {
811 netstackid_t stackid;
812 void * netstack_module;
813 nm_state_t *nms = &ns->netstack_m_state[moduleid];
814 boolean_t dropped = B_FALSE;
815
816 ASSERT(MUTEX_HELD(lockp));
817 mutex_enter(&ns->netstack_lock);
818
819 if (wait_for_nms_inprogress(ns, nms, lockp))
820 dropped = B_TRUE;
821
822 if (nms->nms_flags & NSS_DESTROY_NEEDED) {
823 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
824 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
825 DTRACE_PROBE2(netstack__destroy__inprogress,
826 netstack_t *, ns, int, moduleid);
827 mutex_exit(&ns->netstack_lock);
828 mutex_exit(lockp);
829 dropped = B_TRUE;
830
831 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
832 stackid = ns->netstack_stackid;
833 netstack_module = ns->netstack_modules[moduleid];
834 DTRACE_PROBE2(netstack__destroy__start,
835 netstackid_t, stackid,
836 void *, netstack_module);
837 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
838 DTRACE_PROBE1(netstack__destroy__end,
839 netstack_t *, ns);
840
841 mutex_enter(lockp);
842 mutex_enter(&ns->netstack_lock);
843 ns->netstack_modules[moduleid] = NULL;
844 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
845 nms->nms_flags |= NSS_DESTROY_COMPLETED;
846 cv_broadcast(&nms->nms_cv);
847 DTRACE_PROBE2(netstack__destroy__completed,
848 netstack_t *, ns, int, moduleid);
849 mutex_exit(&ns->netstack_lock);
850 return (dropped);
851 } else {
852 mutex_exit(&ns->netstack_lock);
853 return (dropped);
854 }
855 }
856
857 /*
858 * If somebody is creating the netstack (due to a new zone being created)
859 * then we wait for them to complete. This ensures that any additional
860 * netstack_register() doesn't cause the create functions to run out of
861 * order.
862 * Note that we do not need such a global wait in the case of the shutdown
863 * and destroy callbacks, since in that case it is sufficient for both
864 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
865 * Returns true if lockp was temporarily dropped while waiting.
866 */
867 static boolean_t
wait_for_zone_creator(netstack_t * ns,kmutex_t * lockp)868 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
869 {
870 boolean_t dropped = B_FALSE;
871
872 mutex_enter(&ns->netstack_lock);
873 while (ns->netstack_flags & NSF_ZONE_CREATE) {
874 DTRACE_PROBE1(netstack__wait__zone__inprogress,
875 netstack_t *, ns);
876 if (lockp != NULL) {
877 dropped = B_TRUE;
878 mutex_exit(lockp);
879 }
880 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
881 if (lockp != NULL) {
882 /* First drop netstack_lock to preserve order */
883 mutex_exit(&ns->netstack_lock);
884 mutex_enter(lockp);
885 mutex_enter(&ns->netstack_lock);
886 }
887 }
888 mutex_exit(&ns->netstack_lock);
889 return (dropped);
890 }
891
892 /*
893 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
894 * combination.
895 * Returns true if lockp was temporarily dropped while waiting.
896 */
897 static boolean_t
wait_for_nms_inprogress(netstack_t * ns,nm_state_t * nms,kmutex_t * lockp)898 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
899 {
900 boolean_t dropped = B_FALSE;
901
902 while (nms->nms_flags & NSS_ALL_INPROGRESS) {
903 DTRACE_PROBE2(netstack__wait__nms__inprogress,
904 netstack_t *, ns, nm_state_t *, nms);
905 if (lockp != NULL) {
906 dropped = B_TRUE;
907 mutex_exit(lockp);
908 }
909 cv_wait(&nms->nms_cv, &ns->netstack_lock);
910 if (lockp != NULL) {
911 /* First drop netstack_lock to preserve order */
912 mutex_exit(&ns->netstack_lock);
913 mutex_enter(lockp);
914 mutex_enter(&ns->netstack_lock);
915 }
916 }
917 return (dropped);
918 }
919
920 /*
921 * Get the stack instance used in caller's zone.
922 * Increases the reference count, caller must do a netstack_rele.
923 * It can't be called after zone_destroy() has started.
924 */
925 netstack_t *
netstack_get_current(void)926 netstack_get_current(void)
927 {
928 netstack_t *ns;
929
930 ns = curproc->p_zone->zone_netstack;
931 ASSERT(ns != NULL);
932 return (netstack_hold_if_active(ns));
933 }
934
935 /*
936 * Find a stack instance given the cred.
937 * This is used by the modules to potentially allow for a future when
938 * something other than the zoneid is used to determine the stack.
939 */
940 netstack_t *
netstack_find_by_cred(const cred_t * cr)941 netstack_find_by_cred(const cred_t *cr)
942 {
943 zoneid_t zoneid = crgetzoneid(cr);
944
945 /* Handle the case when cr_zone is NULL */
946 if (zoneid == (zoneid_t)-1)
947 zoneid = GLOBAL_ZONEID;
948
949 /* For performance ... */
950 if (curproc->p_zone->zone_id == zoneid)
951 return (netstack_get_current());
952 else
953 return (netstack_find_by_zoneid(zoneid));
954 }
955
956 /*
957 * Find a stack instance given the zoneid.
958 * Increases the reference count if found; caller must do a
959 * netstack_rele().
960 *
961 * If there is no exact match then assume the shared stack instance
962 * matches.
963 *
964 * Skip the uninitialized and closing ones.
965 */
966 netstack_t *
netstack_find_by_zoneid(zoneid_t zoneid)967 netstack_find_by_zoneid(zoneid_t zoneid)
968 {
969 netstack_t *ns;
970 zone_t *zone;
971
972 zone = zone_find_by_id(zoneid);
973
974 if (zone == NULL)
975 return (NULL);
976
977 ASSERT(zone->zone_netstack != NULL);
978 ns = netstack_hold_if_active(zone->zone_netstack);
979
980 zone_rele(zone);
981 return (ns);
982 }
983
984 /*
985 * Find a stack instance given the zoneid. Can only be called from
986 * the create callback. See the comments in zone_find_by_id_nolock why
987 * that limitation exists.
988 *
989 * Increases the reference count if found; caller must do a
990 * netstack_rele().
991 *
992 * If there is no exact match then assume the shared stack instance
993 * matches.
994 *
995 * Skip the unitialized ones.
996 */
997 netstack_t *
netstack_find_by_zoneid_nolock(zoneid_t zoneid)998 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
999 {
1000 zone_t *zone;
1001
1002 zone = zone_find_by_id_nolock(zoneid);
1003
1004 if (zone == NULL)
1005 return (NULL);
1006
1007 ASSERT(zone->zone_netstack != NULL);
1008 /* zone_find_by_id_nolock does not have a hold on the zone */
1009 return (netstack_hold_if_active(zone->zone_netstack));
1010 }
1011
1012 /*
1013 * Find a stack instance given the stackid with exact match?
1014 * Increases the reference count if found; caller must do a
1015 * netstack_rele().
1016 *
1017 * Skip the unitialized ones.
1018 */
1019 netstack_t *
netstack_find_by_stackid(netstackid_t stackid)1020 netstack_find_by_stackid(netstackid_t stackid)
1021 {
1022 netstack_t *ns;
1023
1024 mutex_enter(&netstack_g_lock);
1025 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1026 /* Can't use hold_if_active because of stackid check. */
1027 mutex_enter(&ns->netstack_lock);
1028 if (ns->netstack_stackid == stackid &&
1029 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1030 netstack_hold_locked(ns);
1031 mutex_exit(&ns->netstack_lock);
1032 mutex_exit(&netstack_g_lock);
1033 return (ns);
1034 }
1035 mutex_exit(&ns->netstack_lock);
1036 }
1037 mutex_exit(&netstack_g_lock);
1038 return (NULL);
1039 }
1040
1041 boolean_t
netstack_inuse_by_stackid(netstackid_t stackid)1042 netstack_inuse_by_stackid(netstackid_t stackid)
1043 {
1044 netstack_t *ns;
1045 boolean_t rval = B_FALSE;
1046
1047 mutex_enter(&netstack_g_lock);
1048
1049 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1050 if (ns->netstack_stackid == stackid) {
1051 rval = B_TRUE;
1052 break;
1053 }
1054 }
1055
1056 mutex_exit(&netstack_g_lock);
1057
1058 return (rval);
1059 }
1060
1061
1062 static void
netstack_reap(void * arg)1063 netstack_reap(void *arg)
1064 {
1065 netstack_t **nsp, *ns = (netstack_t *)arg;
1066 boolean_t found;
1067 int i;
1068
1069 /*
1070 * Time to call the destroy functions and free up
1071 * the structure
1072 */
1073 netstack_stack_inactive(ns);
1074
1075 /* Make sure nothing increased the references */
1076 ASSERT(ns->netstack_refcnt == 0);
1077 ASSERT(ns->netstack_numzones == 0);
1078
1079 /* Finally remove from list of netstacks */
1080 mutex_enter(&netstack_g_lock);
1081 found = B_FALSE;
1082 for (nsp = &netstack_head; *nsp != NULL;
1083 nsp = &(*nsp)->netstack_next) {
1084 if (*nsp == ns) {
1085 *nsp = ns->netstack_next;
1086 ns->netstack_next = NULL;
1087 found = B_TRUE;
1088 break;
1089 }
1090 }
1091 ASSERT(found);
1092 mutex_exit(&netstack_g_lock);
1093
1094 /* Make sure nothing increased the references */
1095 ASSERT(ns->netstack_refcnt == 0);
1096 ASSERT(ns->netstack_numzones == 0);
1097
1098 ASSERT(ns->netstack_flags & NSF_CLOSING);
1099
1100 for (i = 0; i < NS_MAX; i++) {
1101 nm_state_t *nms = &ns->netstack_m_state[i];
1102
1103 cv_destroy(&nms->nms_cv);
1104 }
1105 mutex_destroy(&ns->netstack_lock);
1106 cv_destroy(&ns->netstack_cv);
1107 kmem_free(ns, sizeof (*ns));
1108 /* Allow another reap to be scheduled. */
1109 sema_v(&netstack_reap_limiter);
1110 }
1111
1112 void
netstack_rele(netstack_t * ns)1113 netstack_rele(netstack_t *ns)
1114 {
1115 int refcnt, numzones;
1116
1117 mutex_enter(&ns->netstack_lock);
1118 ASSERT(ns->netstack_refcnt > 0);
1119 ns->netstack_refcnt--;
1120 /*
1121 * As we drop the lock additional netstack_rele()s can come in
1122 * and decrement the refcnt to zero and free the netstack_t.
1123 * Store pointers in local variables and if we were not the last
1124 * then don't reference the netstack_t after that.
1125 */
1126 refcnt = ns->netstack_refcnt;
1127 numzones = ns->netstack_numzones;
1128 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1129 mutex_exit(&ns->netstack_lock);
1130
1131 if (refcnt == 0 && numzones == 0) {
1132 /*
1133 * Because there are possibilities of re-entrancy in various
1134 * netstack structures by callers, which might cause a lock up
1135 * due to odd reference models, or other factors, we choose to
1136 * schedule the actual deletion of this netstack as a deferred
1137 * task on the system taskq. This way, any such reference
1138 * models won't trip over themselves.
1139 *
1140 * Assume we aren't in a high-priority interrupt context, so
1141 * we can use KM_SLEEP and semaphores.
1142 */
1143 if (sema_tryp(&netstack_reap_limiter) == 0) {
1144 /*
1145 * Indicate we're slamming against a limit.
1146 */
1147 hrtime_t measurement = gethrtime();
1148
1149 sema_p(&netstack_reap_limiter);
1150 /* Capture delay in ns. */
1151 DTRACE_PROBE1(netstack__reap__rate__limited,
1152 hrtime_t, gethrtime() - measurement);
1153 }
1154
1155 /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
1156 (void) taskq_dispatch(system_taskq, netstack_reap, ns,
1157 TQ_SLEEP);
1158 }
1159 }
1160
1161 static void
netstack_hold_locked(netstack_t * ns)1162 netstack_hold_locked(netstack_t *ns)
1163 {
1164 ASSERT(MUTEX_HELD(&ns->netstack_lock));
1165 ns->netstack_refcnt++;
1166 ASSERT(ns->netstack_refcnt > 0);
1167 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1168 }
1169
1170 /*
1171 * If the passed-in netstack isn't active (i.e. it's uninitialized or closing),
1172 * return NULL, otherwise return it with its reference held. Common code
1173 * for many netstack_find*() functions.
1174 */
1175 netstack_t *
netstack_hold_if_active(netstack_t * ns)1176 netstack_hold_if_active(netstack_t *ns)
1177 {
1178 netstack_t *retval;
1179
1180 mutex_enter(&ns->netstack_lock);
1181 if (ns->netstack_flags & (NSF_UNINIT | NSF_CLOSING)) {
1182 retval = NULL;
1183 } else {
1184 netstack_hold_locked(ns);
1185 retval = ns;
1186 }
1187 mutex_exit(&ns->netstack_lock);
1188
1189 return (retval);
1190 }
1191
1192 void
netstack_hold(netstack_t * ns)1193 netstack_hold(netstack_t *ns)
1194 {
1195 mutex_enter(&ns->netstack_lock);
1196 netstack_hold_locked(ns);
1197 mutex_exit(&ns->netstack_lock);
1198 }
1199
1200 /*
1201 * To support kstat_create_netstack() using kstat_zone_add we need
1202 * to track both
1203 * - all zoneids that use the global/shared stack
1204 * - all kstats that have been added for the shared stack
1205 */
1206 kstat_t *
kstat_create_netstack(char * ks_module,int ks_instance,char * ks_name,char * ks_class,uchar_t ks_type,uint_t ks_ndata,uchar_t ks_flags,netstackid_t ks_netstackid)1207 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1208 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1209 netstackid_t ks_netstackid)
1210 {
1211 kstat_t *ks;
1212
1213 if (ks_netstackid == GLOBAL_NETSTACKID) {
1214 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1215 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1216 if (ks != NULL)
1217 netstack_shared_kstat_add(ks);
1218 return (ks);
1219 } else {
1220 zoneid_t zoneid = ks_netstackid;
1221
1222 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1223 ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1224 }
1225 }
1226
1227 void
kstat_delete_netstack(kstat_t * ks,netstackid_t ks_netstackid)1228 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1229 {
1230 if (ks_netstackid == GLOBAL_NETSTACKID) {
1231 netstack_shared_kstat_remove(ks);
1232 }
1233 kstat_delete(ks);
1234 }
1235
1236 static void
netstack_shared_zone_add(zoneid_t zoneid)1237 netstack_shared_zone_add(zoneid_t zoneid)
1238 {
1239 struct shared_zone_list *sz;
1240 struct shared_kstat_list *sk;
1241
1242 sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1243 sz->sz_zoneid = zoneid;
1244
1245 /* Insert in list */
1246 mutex_enter(&netstack_shared_lock);
1247 sz->sz_next = netstack_shared_zones;
1248 netstack_shared_zones = sz;
1249
1250 /*
1251 * Perform kstat_zone_add for each existing shared stack kstat.
1252 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1253 */
1254 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1255 kstat_zone_add(sk->sk_kstat, zoneid);
1256 }
1257 mutex_exit(&netstack_shared_lock);
1258 }
1259
1260 static void
netstack_shared_zone_remove(zoneid_t zoneid)1261 netstack_shared_zone_remove(zoneid_t zoneid)
1262 {
1263 struct shared_zone_list **szp, *sz;
1264 struct shared_kstat_list *sk;
1265
1266 /* Find in list */
1267 mutex_enter(&netstack_shared_lock);
1268 sz = NULL;
1269 for (szp = &netstack_shared_zones; *szp != NULL;
1270 szp = &((*szp)->sz_next)) {
1271 if ((*szp)->sz_zoneid == zoneid) {
1272 sz = *szp;
1273 break;
1274 }
1275 }
1276 /* We must find it */
1277 ASSERT(sz != NULL);
1278 *szp = sz->sz_next;
1279 sz->sz_next = NULL;
1280
1281 /*
1282 * Perform kstat_zone_remove for each existing shared stack kstat.
1283 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1284 */
1285 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1286 kstat_zone_remove(sk->sk_kstat, zoneid);
1287 }
1288 mutex_exit(&netstack_shared_lock);
1289
1290 kmem_free(sz, sizeof (*sz));
1291 }
1292
1293 static void
netstack_shared_kstat_add(kstat_t * ks)1294 netstack_shared_kstat_add(kstat_t *ks)
1295 {
1296 struct shared_zone_list *sz;
1297 struct shared_kstat_list *sk;
1298
1299 sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1300 sk->sk_kstat = ks;
1301
1302 /* Insert in list */
1303 mutex_enter(&netstack_shared_lock);
1304 sk->sk_next = netstack_shared_kstats;
1305 netstack_shared_kstats = sk;
1306
1307 /*
1308 * Perform kstat_zone_add for each existing shared stack zone.
1309 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1310 */
1311 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1312 kstat_zone_add(ks, sz->sz_zoneid);
1313 }
1314 mutex_exit(&netstack_shared_lock);
1315 }
1316
1317 static void
netstack_shared_kstat_remove(kstat_t * ks)1318 netstack_shared_kstat_remove(kstat_t *ks)
1319 {
1320 struct shared_zone_list *sz;
1321 struct shared_kstat_list **skp, *sk;
1322
1323 /* Find in list */
1324 mutex_enter(&netstack_shared_lock);
1325 sk = NULL;
1326 for (skp = &netstack_shared_kstats; *skp != NULL;
1327 skp = &((*skp)->sk_next)) {
1328 if ((*skp)->sk_kstat == ks) {
1329 sk = *skp;
1330 break;
1331 }
1332 }
1333 /* Must find it */
1334 ASSERT(sk != NULL);
1335 *skp = sk->sk_next;
1336 sk->sk_next = NULL;
1337
1338 /*
1339 * Perform kstat_zone_remove for each existing shared stack kstat.
1340 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1341 */
1342 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1343 kstat_zone_remove(ks, sz->sz_zoneid);
1344 }
1345 mutex_exit(&netstack_shared_lock);
1346 kmem_free(sk, sizeof (*sk));
1347 }
1348
1349 /*
1350 * If a zoneid is part of the shared zone, return true
1351 */
1352 static boolean_t
netstack_find_shared_zoneid(zoneid_t zoneid)1353 netstack_find_shared_zoneid(zoneid_t zoneid)
1354 {
1355 struct shared_zone_list *sz;
1356
1357 mutex_enter(&netstack_shared_lock);
1358 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1359 if (sz->sz_zoneid == zoneid) {
1360 mutex_exit(&netstack_shared_lock);
1361 return (B_TRUE);
1362 }
1363 }
1364 mutex_exit(&netstack_shared_lock);
1365 return (B_FALSE);
1366 }
1367
1368 /*
1369 * Hide the fact that zoneids and netstackids are allocated from
1370 * the same space in the current implementation.
1371 * We currently do not check that the stackid/zoneids are valid, since there
1372 * is no need for that. But this should only be done for ids that are
1373 * valid.
1374 */
1375 zoneid_t
netstackid_to_zoneid(netstackid_t stackid)1376 netstackid_to_zoneid(netstackid_t stackid)
1377 {
1378 return (stackid);
1379 }
1380
1381 netstackid_t
zoneid_to_netstackid(zoneid_t zoneid)1382 zoneid_to_netstackid(zoneid_t zoneid)
1383 {
1384 if (netstack_find_shared_zoneid(zoneid))
1385 return (GLOBAL_ZONEID);
1386 else
1387 return (zoneid);
1388 }
1389
1390 zoneid_t
netstack_get_zoneid(netstack_t * ns)1391 netstack_get_zoneid(netstack_t *ns)
1392 {
1393 return (netstackid_to_zoneid(ns->netstack_stackid));
1394 }
1395
1396 /*
1397 * Simplistic support for walking all the handles.
1398 * Example usage:
1399 * netstack_handle_t nh;
1400 * netstack_t *ns;
1401 *
1402 * netstack_next_init(&nh);
1403 * while ((ns = netstack_next(&nh)) != NULL) {
1404 * do something;
1405 * netstack_rele(ns);
1406 * }
1407 * netstack_next_fini(&nh);
1408 */
1409 void
netstack_next_init(netstack_handle_t * handle)1410 netstack_next_init(netstack_handle_t *handle)
1411 {
1412 *handle = 0;
1413 }
1414
1415 /* ARGSUSED */
1416 void
netstack_next_fini(netstack_handle_t * handle)1417 netstack_next_fini(netstack_handle_t *handle)
1418 {
1419 }
1420
1421 netstack_t *
netstack_next(netstack_handle_t * handle)1422 netstack_next(netstack_handle_t *handle)
1423 {
1424 netstack_t *ns;
1425 int i, end;
1426
1427 end = *handle;
1428 /* Walk skipping *handle number of instances */
1429
1430 /* Look if there is a matching stack instance */
1431 mutex_enter(&netstack_g_lock);
1432 ns = netstack_head;
1433 for (i = 0; i < end; i++) {
1434 if (ns == NULL)
1435 break;
1436 ns = ns->netstack_next;
1437 }
1438 /*
1439 * Skip those that aren't really here (uninitialized or closing).
1440 * Can't use hold_if_active because of "end" tracking.
1441 */
1442 while (ns != NULL) {
1443 mutex_enter(&ns->netstack_lock);
1444 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1445 *handle = end + 1;
1446 netstack_hold_locked(ns);
1447 mutex_exit(&ns->netstack_lock);
1448 break;
1449 }
1450 mutex_exit(&ns->netstack_lock);
1451 end++;
1452 ns = ns->netstack_next;
1453 }
1454 mutex_exit(&netstack_g_lock);
1455 return (ns);
1456 }
1457