1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright (c) 2013, Joyent, Inc. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/sysmacros.h>
30 #include <sys/vm.h>
31 #include <sys/proc.h>
32 #include <sys/tuneable.h>
33 #include <sys/systm.h>
34 #include <sys/cmn_err.h>
35 #include <sys/debug.h>
36 #include <sys/sdt.h>
37 #include <sys/mutex.h>
38 #include <sys/bitmap.h>
39 #include <sys/atomic.h>
40 #include <sys/kobj.h>
41 #include <sys/disp.h>
42 #include <vm/seg_kmem.h>
43 #include <sys/zone.h>
44 #include <sys/netstack.h>
45
46 /*
47 * What we use so that the zones framework can tell us about new zones,
48 * which we use to create new stacks.
49 */
50 static zone_key_t netstack_zone_key;
51
52 static int netstack_initialized = 0;
53
54 /*
55 * Track the registered netstacks.
56 * The global lock protects
57 * - ns_reg
58 * - the list starting at netstack_head and following the netstack_next
59 * pointers.
60 */
61 static kmutex_t netstack_g_lock;
62
63 /*
64 * Registry of netstacks with their create/shutdown/destory functions.
65 */
66 static struct netstack_registry ns_reg[NS_MAX];
67
68 /*
69 * Global list of existing stacks. We use this when a new zone with
70 * an exclusive IP instance is created.
71 *
72 * Note that in some cases a netstack_t needs to stay around after the zone
73 * has gone away. This is because there might be outstanding references
74 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
75 * structure and all the foo_stack_t's hanging off of it will be cleaned up
76 * when the last reference to it is dropped.
77 * However, the same zone might be rebooted. That is handled using the
78 * assumption that the zones framework picks a new zoneid each time a zone
79 * is (re)booted. We assert for that condition in netstack_zone_create().
80 * Thus the old netstack_t can take its time for things to time out.
81 */
82 static netstack_t *netstack_head;
83
84 /*
85 * To support kstat_create_netstack() using kstat_zone_add we need
86 * to track both
87 * - all zoneids that use the global/shared stack
88 * - all kstats that have been added for the shared stack
89 */
90 struct shared_zone_list {
91 struct shared_zone_list *sz_next;
92 zoneid_t sz_zoneid;
93 };
94
95 struct shared_kstat_list {
96 struct shared_kstat_list *sk_next;
97 kstat_t *sk_kstat;
98 };
99
100 static kmutex_t netstack_shared_lock; /* protects the following two */
101 static struct shared_zone_list *netstack_shared_zones;
102 static struct shared_kstat_list *netstack_shared_kstats;
103
104 static void *netstack_zone_create(zoneid_t zoneid);
105 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
106 static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
107
108 static void netstack_shared_zone_add(zoneid_t zoneid);
109 static void netstack_shared_zone_remove(zoneid_t zoneid);
110 static void netstack_shared_kstat_add(kstat_t *ks);
111 static void netstack_shared_kstat_remove(kstat_t *ks);
112
113 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
114
115 static void apply_all_netstacks(int, applyfn_t *);
116 static void apply_all_modules(netstack_t *, applyfn_t *);
117 static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
118 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
119 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
120 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
121 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
122 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
123 kmutex_t *);
124
125 void
netstack_init(void)126 netstack_init(void)
127 {
128 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
129 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
130
131 netstack_initialized = 1;
132
133 /*
134 * We want to be informed each time a zone is created or
135 * destroyed in the kernel, so we can maintain the
136 * stack instance information.
137 */
138 zone_key_create(&netstack_zone_key, netstack_zone_create,
139 netstack_zone_shutdown, netstack_zone_destroy);
140 }
141
142 /*
143 * Register a new module with the framework.
144 * This registers interest in changes to the set of netstacks.
145 * The createfn and destroyfn are required, but the shutdownfn can be
146 * NULL.
147 * Note that due to the current zsd implementation, when the create
148 * function is called the zone isn't fully present, thus functions
149 * like zone_find_by_* will fail, hence the create function can not
150 * use many zones kernel functions including zcmn_err().
151 */
152 void
netstack_register(int moduleid,void * (* module_create)(netstackid_t,netstack_t *),void (* module_shutdown)(netstackid_t,void *),void (* module_destroy)(netstackid_t,void *))153 netstack_register(int moduleid,
154 void *(*module_create)(netstackid_t, netstack_t *),
155 void (*module_shutdown)(netstackid_t, void *),
156 void (*module_destroy)(netstackid_t, void *))
157 {
158 netstack_t *ns;
159
160 ASSERT(netstack_initialized);
161 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
162 ASSERT(module_create != NULL);
163
164 /*
165 * Make instances created after this point in time run the create
166 * callback.
167 */
168 mutex_enter(&netstack_g_lock);
169 ASSERT(ns_reg[moduleid].nr_create == NULL);
170 ASSERT(ns_reg[moduleid].nr_flags == 0);
171 ns_reg[moduleid].nr_create = module_create;
172 ns_reg[moduleid].nr_shutdown = module_shutdown;
173 ns_reg[moduleid].nr_destroy = module_destroy;
174 ns_reg[moduleid].nr_flags = NRF_REGISTERED;
175
176 /*
177 * Determine the set of stacks that exist before we drop the lock.
178 * Set NSS_CREATE_NEEDED for each of those.
179 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
180 * set, but check NSF_CLOSING to be sure.
181 */
182 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
183 nm_state_t *nms = &ns->netstack_m_state[moduleid];
184
185 mutex_enter(&ns->netstack_lock);
186 if (!(ns->netstack_flags & NSF_CLOSING) &&
187 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
188 nms->nms_flags |= NSS_CREATE_NEEDED;
189 DTRACE_PROBE2(netstack__create__needed,
190 netstack_t *, ns, int, moduleid);
191 }
192 mutex_exit(&ns->netstack_lock);
193 }
194 mutex_exit(&netstack_g_lock);
195
196 /*
197 * At this point in time a new instance can be created or an instance
198 * can be destroyed, or some other module can register or unregister.
199 * Make sure we either run all the create functions for this moduleid
200 * or we wait for any other creators for this moduleid.
201 */
202 apply_all_netstacks(moduleid, netstack_apply_create);
203 }
204
205 void
netstack_unregister(int moduleid)206 netstack_unregister(int moduleid)
207 {
208 netstack_t *ns;
209
210 ASSERT(moduleid >= 0 && moduleid < NS_MAX);
211
212 ASSERT(ns_reg[moduleid].nr_create != NULL);
213 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
214
215 mutex_enter(&netstack_g_lock);
216 /*
217 * Determine the set of stacks that exist before we drop the lock.
218 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
219 * That ensures that when we return all the callbacks for existing
220 * instances have completed. And since we set NRF_DYING no new
221 * instances can use this module.
222 */
223 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
224 boolean_t created = B_FALSE;
225 nm_state_t *nms = &ns->netstack_m_state[moduleid];
226
227 mutex_enter(&ns->netstack_lock);
228
229 /*
230 * We need to be careful here. We could actually have a netstack
231 * being created as we speak waiting for us to let go of this
232 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
233 * have gotten to the point of completing it yet. If
234 * NSS_CREATE_NEEDED, we can safely just remove it here and
235 * never create the module. However, if NSS_CREATE_INPROGRESS is
236 * set, we need to still flag this module for shutdown and
237 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
238 *
239 * It is safe to do that because of two different guarantees
240 * that exist in the system. The first is that before we do a
241 * create, shutdown, or destroy, we ensure that nothing else is
242 * in progress in the system for this netstack and wait for it
243 * to complete. Secondly, because the zone is being created, we
244 * know that the following call to apply_all_netstack will block
245 * on the zone finishing its initialization.
246 */
247 if (nms->nms_flags & NSS_CREATE_NEEDED)
248 nms->nms_flags &= ~NSS_CREATE_NEEDED;
249
250 if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
251 nms->nms_flags & NSS_CREATE_COMPLETED)
252 created = B_TRUE;
253
254 if (ns_reg[moduleid].nr_shutdown != NULL && created &&
255 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
256 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
257 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
258 DTRACE_PROBE2(netstack__shutdown__needed,
259 netstack_t *, ns, int, moduleid);
260 }
261 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
262 ns_reg[moduleid].nr_destroy != NULL && created &&
263 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
264 nms->nms_flags |= NSS_DESTROY_NEEDED;
265 DTRACE_PROBE2(netstack__destroy__needed,
266 netstack_t *, ns, int, moduleid);
267 }
268 mutex_exit(&ns->netstack_lock);
269 }
270 /*
271 * Prevent any new netstack from calling the registered create
272 * function, while keeping the function pointers in place until the
273 * shutdown and destroy callbacks are complete.
274 */
275 ns_reg[moduleid].nr_flags |= NRF_DYING;
276 mutex_exit(&netstack_g_lock);
277
278 apply_all_netstacks(moduleid, netstack_apply_shutdown);
279 apply_all_netstacks(moduleid, netstack_apply_destroy);
280
281 /*
282 * Clear the nms_flags so that we can handle this module
283 * being loaded again.
284 * Also remove the registered functions.
285 */
286 mutex_enter(&netstack_g_lock);
287 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
288 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
289 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
290 nm_state_t *nms = &ns->netstack_m_state[moduleid];
291
292 mutex_enter(&ns->netstack_lock);
293 if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
294 nms->nms_flags = 0;
295 DTRACE_PROBE2(netstack__destroy__done,
296 netstack_t *, ns, int, moduleid);
297 }
298 mutex_exit(&ns->netstack_lock);
299 }
300
301 ns_reg[moduleid].nr_create = NULL;
302 ns_reg[moduleid].nr_shutdown = NULL;
303 ns_reg[moduleid].nr_destroy = NULL;
304 ns_reg[moduleid].nr_flags = 0;
305 mutex_exit(&netstack_g_lock);
306 }
307
308 /*
309 * Lookup and/or allocate a netstack for this zone.
310 */
311 static void *
netstack_zone_create(zoneid_t zoneid)312 netstack_zone_create(zoneid_t zoneid)
313 {
314 netstackid_t stackid;
315 netstack_t *ns;
316 netstack_t **nsp;
317 zone_t *zone;
318 int i;
319
320 ASSERT(netstack_initialized);
321
322 zone = zone_find_by_id_nolock(zoneid);
323 ASSERT(zone != NULL);
324
325 if (zone->zone_flags & ZF_NET_EXCL) {
326 stackid = zoneid;
327 } else {
328 /* Look for the stack instance for the global */
329 stackid = GLOBAL_NETSTACKID;
330 }
331
332 /* Allocate even if it isn't needed; simplifies locking */
333 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
334
335 /* Look if there is a matching stack instance */
336 mutex_enter(&netstack_g_lock);
337 for (nsp = &netstack_head; *nsp != NULL;
338 nsp = &((*nsp)->netstack_next)) {
339 if ((*nsp)->netstack_stackid == stackid) {
340 /*
341 * Should never find a pre-existing exclusive stack
342 */
343 ASSERT(stackid == GLOBAL_NETSTACKID);
344 kmem_free(ns, sizeof (netstack_t));
345 ns = *nsp;
346 mutex_enter(&ns->netstack_lock);
347 ns->netstack_numzones++;
348 mutex_exit(&ns->netstack_lock);
349 mutex_exit(&netstack_g_lock);
350 DTRACE_PROBE1(netstack__inc__numzones,
351 netstack_t *, ns);
352 /* Record that we have a new shared stack zone */
353 netstack_shared_zone_add(zoneid);
354 zone->zone_netstack = ns;
355 return (ns);
356 }
357 }
358 /* Not found */
359 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
360 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
361 ns->netstack_stackid = zoneid;
362 ns->netstack_numzones = 1;
363 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
364 ns->netstack_flags = NSF_UNINIT;
365 *nsp = ns;
366 zone->zone_netstack = ns;
367
368 mutex_enter(&ns->netstack_lock);
369 /*
370 * Mark this netstack as having a CREATE running so
371 * any netstack_register/netstack_unregister waits for
372 * the existing create callbacks to complete in moduleid order
373 */
374 ns->netstack_flags |= NSF_ZONE_CREATE;
375
376 /*
377 * Determine the set of module create functions that need to be
378 * called before we drop the lock.
379 * Set NSS_CREATE_NEEDED for each of those.
380 * Skip any with NRF_DYING set, since those are in the process of
381 * going away, by checking for flags being exactly NRF_REGISTERED.
382 */
383 for (i = 0; i < NS_MAX; i++) {
384 nm_state_t *nms = &ns->netstack_m_state[i];
385
386 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
387
388 if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
389 (nms->nms_flags & NSS_CREATE_ALL) == 0) {
390 nms->nms_flags |= NSS_CREATE_NEEDED;
391 DTRACE_PROBE2(netstack__create__needed,
392 netstack_t *, ns, int, i);
393 }
394 }
395 mutex_exit(&ns->netstack_lock);
396 mutex_exit(&netstack_g_lock);
397
398 apply_all_modules(ns, netstack_apply_create);
399
400 /* Tell any waiting netstack_register/netstack_unregister to proceed */
401 mutex_enter(&ns->netstack_lock);
402 ns->netstack_flags &= ~NSF_UNINIT;
403 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
404 ns->netstack_flags &= ~NSF_ZONE_CREATE;
405 cv_broadcast(&ns->netstack_cv);
406 mutex_exit(&ns->netstack_lock);
407
408 return (ns);
409 }
410
411 /* ARGSUSED */
412 static void
netstack_zone_shutdown(zoneid_t zoneid,void * arg)413 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
414 {
415 netstack_t *ns = (netstack_t *)arg;
416 int i;
417
418 ASSERT(arg != NULL);
419
420 mutex_enter(&ns->netstack_lock);
421 ASSERT(ns->netstack_numzones > 0);
422 if (ns->netstack_numzones != 1) {
423 /* Stack instance being used by other zone */
424 mutex_exit(&ns->netstack_lock);
425 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
426 return;
427 }
428 mutex_exit(&ns->netstack_lock);
429
430 mutex_enter(&netstack_g_lock);
431 mutex_enter(&ns->netstack_lock);
432 /*
433 * Mark this netstack as having a SHUTDOWN running so
434 * any netstack_register/netstack_unregister waits for
435 * the existing create callbacks to complete in moduleid order
436 */
437 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
438 ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
439
440 /*
441 * Determine the set of stacks that exist before we drop the lock.
442 * Set NSS_SHUTDOWN_NEEDED for each of those.
443 */
444 for (i = 0; i < NS_MAX; i++) {
445 nm_state_t *nms = &ns->netstack_m_state[i];
446
447 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
448 ns_reg[i].nr_shutdown != NULL &&
449 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
450 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
451 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
452 DTRACE_PROBE2(netstack__shutdown__needed,
453 netstack_t *, ns, int, i);
454 }
455 }
456 mutex_exit(&ns->netstack_lock);
457 mutex_exit(&netstack_g_lock);
458
459 /*
460 * Call the shutdown function for all registered modules for this
461 * netstack.
462 */
463 apply_all_modules_reverse(ns, netstack_apply_shutdown);
464
465 /* Tell any waiting netstack_register/netstack_unregister to proceed */
466 mutex_enter(&ns->netstack_lock);
467 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
468 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
469 cv_broadcast(&ns->netstack_cv);
470 mutex_exit(&ns->netstack_lock);
471 }
472
473 /*
474 * Common routine to release a zone.
475 * If this was the last zone using the stack instance then prepare to
476 * have the refcnt dropping to zero free the zone.
477 */
478 /* ARGSUSED */
479 static void
netstack_zone_destroy(zoneid_t zoneid,void * arg)480 netstack_zone_destroy(zoneid_t zoneid, void *arg)
481 {
482 netstack_t *ns = (netstack_t *)arg;
483
484 ASSERT(arg != NULL);
485
486 mutex_enter(&ns->netstack_lock);
487 ASSERT(ns->netstack_numzones > 0);
488 ns->netstack_numzones--;
489 if (ns->netstack_numzones != 0) {
490 /* Stack instance being used by other zone */
491 mutex_exit(&ns->netstack_lock);
492 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
493 /* Record that we a shared stack zone has gone away */
494 netstack_shared_zone_remove(zoneid);
495 return;
496 }
497 /*
498 * Set CLOSING so that netstack_find_by will not find it.
499 */
500 ns->netstack_flags |= NSF_CLOSING;
501 mutex_exit(&ns->netstack_lock);
502 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
503 /* No other thread can call zone_destroy for this stack */
504
505 /*
506 * Decrease refcnt to account for the one in netstack_zone_init()
507 */
508 netstack_rele(ns);
509 }
510
511 /*
512 * Called when the reference count drops to zero.
513 * Call the destroy functions for each registered module.
514 */
515 static void
netstack_stack_inactive(netstack_t * ns)516 netstack_stack_inactive(netstack_t *ns)
517 {
518 int i;
519
520 mutex_enter(&netstack_g_lock);
521 mutex_enter(&ns->netstack_lock);
522 /*
523 * Mark this netstack as having a DESTROY running so
524 * any netstack_register/netstack_unregister waits for
525 * the existing destroy callbacks to complete in reverse moduleid order
526 */
527 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
528 ns->netstack_flags |= NSF_ZONE_DESTROY;
529 /*
530 * If the shutdown callback wasn't called earlier (e.g., if this is
531 * a netstack shared between multiple zones), then we schedule it now.
532 *
533 * Determine the set of stacks that exist before we drop the lock.
534 * Set NSS_DESTROY_NEEDED for each of those. That
535 * ensures that when we return all the callbacks for existing
536 * instances have completed.
537 */
538 for (i = 0; i < NS_MAX; i++) {
539 nm_state_t *nms = &ns->netstack_m_state[i];
540
541 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
542 ns_reg[i].nr_shutdown != NULL &&
543 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
544 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
545 nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
546 DTRACE_PROBE2(netstack__shutdown__needed,
547 netstack_t *, ns, int, i);
548 }
549
550 if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
551 ns_reg[i].nr_destroy != NULL &&
552 (nms->nms_flags & NSS_CREATE_COMPLETED) &&
553 (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
554 nms->nms_flags |= NSS_DESTROY_NEEDED;
555 DTRACE_PROBE2(netstack__destroy__needed,
556 netstack_t *, ns, int, i);
557 }
558 }
559 mutex_exit(&ns->netstack_lock);
560 mutex_exit(&netstack_g_lock);
561
562 /*
563 * Call the shutdown and destroy functions for all registered modules
564 * for this netstack.
565 *
566 * Since there are some ordering dependencies between the modules we
567 * tear them down in the reverse order of what was used to create them.
568 *
569 * Since a netstack_t is never reused (when a zone is rebooted it gets
570 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
571 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
572 * That is different than in the netstack_unregister() case.
573 */
574 apply_all_modules_reverse(ns, netstack_apply_shutdown);
575 apply_all_modules_reverse(ns, netstack_apply_destroy);
576
577 /* Tell any waiting netstack_register/netstack_unregister to proceed */
578 mutex_enter(&ns->netstack_lock);
579 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
580 ns->netstack_flags &= ~NSF_ZONE_DESTROY;
581 cv_broadcast(&ns->netstack_cv);
582 mutex_exit(&ns->netstack_lock);
583 }
584
585 /*
586 * Apply a function to all netstacks for a particular moduleid.
587 *
588 * If there is any zone activity (due to a zone being created, shutdown,
589 * or destroyed) we wait for that to complete before we proceed. This ensures
590 * that the moduleids are processed in order when a zone is created or
591 * destroyed.
592 *
593 * The applyfn has to drop netstack_g_lock if it does some work.
594 * In that case we don't follow netstack_next,
595 * even if it is possible to do so without any hazards. This is
596 * because we want the design to allow for the list of netstacks threaded
597 * by netstack_next to change in any arbitrary way during the time the
598 * lock was dropped.
599 *
600 * It is safe to restart the loop at netstack_head since the applyfn
601 * changes netstack_m_state as it processes things, so a subsequent
602 * pass through will have no effect in applyfn, hence the loop will terminate
603 * in at worst O(N^2).
604 */
605 static void
apply_all_netstacks(int moduleid,applyfn_t * applyfn)606 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
607 {
608 netstack_t *ns;
609
610 mutex_enter(&netstack_g_lock);
611 ns = netstack_head;
612 while (ns != NULL) {
613 if (wait_for_zone_creator(ns, &netstack_g_lock)) {
614 /* Lock dropped - restart at head */
615 ns = netstack_head;
616 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
617 /* Lock dropped - restart at head */
618 ns = netstack_head;
619 } else {
620 ns = ns->netstack_next;
621 }
622 }
623 mutex_exit(&netstack_g_lock);
624 }
625
626 /*
627 * Apply a function to all moduleids for a particular netstack.
628 *
629 * Since the netstack linkage doesn't matter in this case we can
630 * ignore whether the function drops the lock.
631 */
632 static void
apply_all_modules(netstack_t * ns,applyfn_t * applyfn)633 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
634 {
635 int i;
636
637 mutex_enter(&netstack_g_lock);
638 for (i = 0; i < NS_MAX; i++) {
639 /*
640 * We don't care whether the lock was dropped
641 * since we are not iterating over netstack_head.
642 */
643 (void) (applyfn)(&netstack_g_lock, ns, i);
644 }
645 mutex_exit(&netstack_g_lock);
646 }
647
648 /* Like the above but in reverse moduleid order */
649 static void
apply_all_modules_reverse(netstack_t * ns,applyfn_t * applyfn)650 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
651 {
652 int i;
653
654 mutex_enter(&netstack_g_lock);
655 for (i = NS_MAX-1; i >= 0; i--) {
656 /*
657 * We don't care whether the lock was dropped
658 * since we are not iterating over netstack_head.
659 */
660 (void) (applyfn)(&netstack_g_lock, ns, i);
661 }
662 mutex_exit(&netstack_g_lock);
663 }
664
665 /*
666 * Call the create function for the ns and moduleid if CREATE_NEEDED
667 * is set.
668 * If some other thread gets here first and sets *_INPROGRESS, then
669 * we wait for that thread to complete so that we can ensure that
670 * all the callbacks are done when we've looped over all netstacks/moduleids.
671 *
672 * When we call the create function, we temporarily drop the netstack_lock
673 * held by the caller, and return true to tell the caller it needs to
674 * re-evalute the state.
675 */
676 static boolean_t
netstack_apply_create(kmutex_t * lockp,netstack_t * ns,int moduleid)677 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
678 {
679 void *result;
680 netstackid_t stackid;
681 nm_state_t *nms = &ns->netstack_m_state[moduleid];
682 boolean_t dropped = B_FALSE;
683
684 ASSERT(MUTEX_HELD(lockp));
685 mutex_enter(&ns->netstack_lock);
686
687 if (wait_for_nms_inprogress(ns, nms, lockp))
688 dropped = B_TRUE;
689
690 if (nms->nms_flags & NSS_CREATE_NEEDED) {
691 nms->nms_flags &= ~NSS_CREATE_NEEDED;
692 nms->nms_flags |= NSS_CREATE_INPROGRESS;
693 DTRACE_PROBE2(netstack__create__inprogress,
694 netstack_t *, ns, int, moduleid);
695 mutex_exit(&ns->netstack_lock);
696 mutex_exit(lockp);
697 dropped = B_TRUE;
698
699 ASSERT(ns_reg[moduleid].nr_create != NULL);
700 stackid = ns->netstack_stackid;
701 DTRACE_PROBE2(netstack__create__start,
702 netstackid_t, stackid,
703 netstack_t *, ns);
704 result = (ns_reg[moduleid].nr_create)(stackid, ns);
705 DTRACE_PROBE2(netstack__create__end,
706 void *, result, netstack_t *, ns);
707
708 ASSERT(result != NULL);
709 mutex_enter(lockp);
710 mutex_enter(&ns->netstack_lock);
711 ns->netstack_modules[moduleid] = result;
712 nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
713 nms->nms_flags |= NSS_CREATE_COMPLETED;
714 cv_broadcast(&nms->nms_cv);
715 DTRACE_PROBE2(netstack__create__completed,
716 netstack_t *, ns, int, moduleid);
717 mutex_exit(&ns->netstack_lock);
718 return (dropped);
719 } else {
720 mutex_exit(&ns->netstack_lock);
721 return (dropped);
722 }
723 }
724
725 /*
726 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
727 * is set.
728 * If some other thread gets here first and sets *_INPROGRESS, then
729 * we wait for that thread to complete so that we can ensure that
730 * all the callbacks are done when we've looped over all netstacks/moduleids.
731 *
732 * When we call the shutdown function, we temporarily drop the netstack_lock
733 * held by the caller, and return true to tell the caller it needs to
734 * re-evalute the state.
735 */
736 static boolean_t
netstack_apply_shutdown(kmutex_t * lockp,netstack_t * ns,int moduleid)737 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
738 {
739 netstackid_t stackid;
740 void * netstack_module;
741 nm_state_t *nms = &ns->netstack_m_state[moduleid];
742 boolean_t dropped = B_FALSE;
743
744 ASSERT(MUTEX_HELD(lockp));
745 mutex_enter(&ns->netstack_lock);
746
747 if (wait_for_nms_inprogress(ns, nms, lockp))
748 dropped = B_TRUE;
749
750 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
751 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
752 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
753 DTRACE_PROBE2(netstack__shutdown__inprogress,
754 netstack_t *, ns, int, moduleid);
755 mutex_exit(&ns->netstack_lock);
756 mutex_exit(lockp);
757 dropped = B_TRUE;
758
759 ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
760 stackid = ns->netstack_stackid;
761 netstack_module = ns->netstack_modules[moduleid];
762 DTRACE_PROBE2(netstack__shutdown__start,
763 netstackid_t, stackid,
764 void *, netstack_module);
765 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
766 DTRACE_PROBE1(netstack__shutdown__end,
767 netstack_t *, ns);
768
769 mutex_enter(lockp);
770 mutex_enter(&ns->netstack_lock);
771 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
772 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
773 cv_broadcast(&nms->nms_cv);
774 DTRACE_PROBE2(netstack__shutdown__completed,
775 netstack_t *, ns, int, moduleid);
776 mutex_exit(&ns->netstack_lock);
777 return (dropped);
778 } else {
779 mutex_exit(&ns->netstack_lock);
780 return (dropped);
781 }
782 }
783
784 /*
785 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
786 * is set.
787 * If some other thread gets here first and sets *_INPROGRESS, then
788 * we wait for that thread to complete so that we can ensure that
789 * all the callbacks are done when we've looped over all netstacks/moduleids.
790 *
791 * When we call the destroy function, we temporarily drop the netstack_lock
792 * held by the caller, and return true to tell the caller it needs to
793 * re-evalute the state.
794 */
795 static boolean_t
netstack_apply_destroy(kmutex_t * lockp,netstack_t * ns,int moduleid)796 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
797 {
798 netstackid_t stackid;
799 void * netstack_module;
800 nm_state_t *nms = &ns->netstack_m_state[moduleid];
801 boolean_t dropped = B_FALSE;
802
803 ASSERT(MUTEX_HELD(lockp));
804 mutex_enter(&ns->netstack_lock);
805
806 if (wait_for_nms_inprogress(ns, nms, lockp))
807 dropped = B_TRUE;
808
809 if (nms->nms_flags & NSS_DESTROY_NEEDED) {
810 nms->nms_flags &= ~NSS_DESTROY_NEEDED;
811 nms->nms_flags |= NSS_DESTROY_INPROGRESS;
812 DTRACE_PROBE2(netstack__destroy__inprogress,
813 netstack_t *, ns, int, moduleid);
814 mutex_exit(&ns->netstack_lock);
815 mutex_exit(lockp);
816 dropped = B_TRUE;
817
818 ASSERT(ns_reg[moduleid].nr_destroy != NULL);
819 stackid = ns->netstack_stackid;
820 netstack_module = ns->netstack_modules[moduleid];
821 DTRACE_PROBE2(netstack__destroy__start,
822 netstackid_t, stackid,
823 void *, netstack_module);
824 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
825 DTRACE_PROBE1(netstack__destroy__end,
826 netstack_t *, ns);
827
828 mutex_enter(lockp);
829 mutex_enter(&ns->netstack_lock);
830 ns->netstack_modules[moduleid] = NULL;
831 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
832 nms->nms_flags |= NSS_DESTROY_COMPLETED;
833 cv_broadcast(&nms->nms_cv);
834 DTRACE_PROBE2(netstack__destroy__completed,
835 netstack_t *, ns, int, moduleid);
836 mutex_exit(&ns->netstack_lock);
837 return (dropped);
838 } else {
839 mutex_exit(&ns->netstack_lock);
840 return (dropped);
841 }
842 }
843
844 /*
845 * If somebody is creating the netstack (due to a new zone being created)
846 * then we wait for them to complete. This ensures that any additional
847 * netstack_register() doesn't cause the create functions to run out of
848 * order.
849 * Note that we do not need such a global wait in the case of the shutdown
850 * and destroy callbacks, since in that case it is sufficient for both
851 * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
852 * Returns true if lockp was temporarily dropped while waiting.
853 */
854 static boolean_t
wait_for_zone_creator(netstack_t * ns,kmutex_t * lockp)855 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
856 {
857 boolean_t dropped = B_FALSE;
858
859 mutex_enter(&ns->netstack_lock);
860 while (ns->netstack_flags & NSF_ZONE_CREATE) {
861 DTRACE_PROBE1(netstack__wait__zone__inprogress,
862 netstack_t *, ns);
863 if (lockp != NULL) {
864 dropped = B_TRUE;
865 mutex_exit(lockp);
866 }
867 cv_wait(&ns->netstack_cv, &ns->netstack_lock);
868 if (lockp != NULL) {
869 /* First drop netstack_lock to preserve order */
870 mutex_exit(&ns->netstack_lock);
871 mutex_enter(lockp);
872 mutex_enter(&ns->netstack_lock);
873 }
874 }
875 mutex_exit(&ns->netstack_lock);
876 return (dropped);
877 }
878
879 /*
880 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
881 * combination.
882 * Returns true if lockp was temporarily dropped while waiting.
883 */
884 static boolean_t
wait_for_nms_inprogress(netstack_t * ns,nm_state_t * nms,kmutex_t * lockp)885 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
886 {
887 boolean_t dropped = B_FALSE;
888
889 while (nms->nms_flags & NSS_ALL_INPROGRESS) {
890 DTRACE_PROBE2(netstack__wait__nms__inprogress,
891 netstack_t *, ns, nm_state_t *, nms);
892 if (lockp != NULL) {
893 dropped = B_TRUE;
894 mutex_exit(lockp);
895 }
896 cv_wait(&nms->nms_cv, &ns->netstack_lock);
897 if (lockp != NULL) {
898 /* First drop netstack_lock to preserve order */
899 mutex_exit(&ns->netstack_lock);
900 mutex_enter(lockp);
901 mutex_enter(&ns->netstack_lock);
902 }
903 }
904 return (dropped);
905 }
906
907 /*
908 * Get the stack instance used in caller's zone.
909 * Increases the reference count, caller must do a netstack_rele.
910 * It can't be called after zone_destroy() has started.
911 */
912 netstack_t *
netstack_get_current(void)913 netstack_get_current(void)
914 {
915 netstack_t *ns;
916
917 ns = curproc->p_zone->zone_netstack;
918 ASSERT(ns != NULL);
919 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
920 return (NULL);
921
922 netstack_hold(ns);
923
924 return (ns);
925 }
926
927 /*
928 * Find a stack instance given the cred.
929 * This is used by the modules to potentially allow for a future when
930 * something other than the zoneid is used to determine the stack.
931 */
932 netstack_t *
netstack_find_by_cred(const cred_t * cr)933 netstack_find_by_cred(const cred_t *cr)
934 {
935 zoneid_t zoneid = crgetzoneid(cr);
936
937 /* Handle the case when cr_zone is NULL */
938 if (zoneid == (zoneid_t)-1)
939 zoneid = GLOBAL_ZONEID;
940
941 /* For performance ... */
942 if (curproc->p_zone->zone_id == zoneid)
943 return (netstack_get_current());
944 else
945 return (netstack_find_by_zoneid(zoneid));
946 }
947
948 /*
949 * Find a stack instance given the zoneid.
950 * Increases the reference count if found; caller must do a
951 * netstack_rele().
952 *
953 * If there is no exact match then assume the shared stack instance
954 * matches.
955 *
956 * Skip the unitialized ones.
957 */
958 netstack_t *
netstack_find_by_zoneid(zoneid_t zoneid)959 netstack_find_by_zoneid(zoneid_t zoneid)
960 {
961 netstack_t *ns;
962 zone_t *zone;
963
964 zone = zone_find_by_id(zoneid);
965
966 if (zone == NULL)
967 return (NULL);
968
969 ns = zone->zone_netstack;
970 ASSERT(ns != NULL);
971 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
972 ns = NULL;
973 else
974 netstack_hold(ns);
975
976 zone_rele(zone);
977 return (ns);
978 }
979
980 /*
981 * Find a stack instance given the zoneid. Can only be called from
982 * the create callback. See the comments in zone_find_by_id_nolock why
983 * that limitation exists.
984 *
985 * Increases the reference count if found; caller must do a
986 * netstack_rele().
987 *
988 * If there is no exact match then assume the shared stack instance
989 * matches.
990 *
991 * Skip the unitialized ones.
992 */
993 netstack_t *
netstack_find_by_zoneid_nolock(zoneid_t zoneid)994 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
995 {
996 netstack_t *ns;
997 zone_t *zone;
998
999 zone = zone_find_by_id_nolock(zoneid);
1000
1001 if (zone == NULL)
1002 return (NULL);
1003
1004 ns = zone->zone_netstack;
1005 ASSERT(ns != NULL);
1006
1007 if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1008 ns = NULL;
1009 else
1010 netstack_hold(ns);
1011
1012 /* zone_find_by_id_nolock does not have a hold on the zone */
1013 return (ns);
1014 }
1015
1016 /*
1017 * Find a stack instance given the stackid with exact match?
1018 * Increases the reference count if found; caller must do a
1019 * netstack_rele().
1020 *
1021 * Skip the unitialized ones.
1022 */
1023 netstack_t *
netstack_find_by_stackid(netstackid_t stackid)1024 netstack_find_by_stackid(netstackid_t stackid)
1025 {
1026 netstack_t *ns;
1027
1028 mutex_enter(&netstack_g_lock);
1029 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1030 mutex_enter(&ns->netstack_lock);
1031 if (ns->netstack_stackid == stackid &&
1032 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1033 mutex_exit(&ns->netstack_lock);
1034 netstack_hold(ns);
1035 mutex_exit(&netstack_g_lock);
1036 return (ns);
1037 }
1038 mutex_exit(&ns->netstack_lock);
1039 }
1040 mutex_exit(&netstack_g_lock);
1041 return (NULL);
1042 }
1043
1044 void
netstack_rele(netstack_t * ns)1045 netstack_rele(netstack_t *ns)
1046 {
1047 netstack_t **nsp;
1048 boolean_t found;
1049 int refcnt, numzones;
1050 int i;
1051
1052 mutex_enter(&ns->netstack_lock);
1053 ASSERT(ns->netstack_refcnt > 0);
1054 ns->netstack_refcnt--;
1055 /*
1056 * As we drop the lock additional netstack_rele()s can come in
1057 * and decrement the refcnt to zero and free the netstack_t.
1058 * Store pointers in local variables and if we were not the last
1059 * then don't reference the netstack_t after that.
1060 */
1061 refcnt = ns->netstack_refcnt;
1062 numzones = ns->netstack_numzones;
1063 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1064 mutex_exit(&ns->netstack_lock);
1065
1066 if (refcnt == 0 && numzones == 0) {
1067 /*
1068 * Time to call the destroy functions and free up
1069 * the structure
1070 */
1071 netstack_stack_inactive(ns);
1072
1073 /* Make sure nothing increased the references */
1074 ASSERT(ns->netstack_refcnt == 0);
1075 ASSERT(ns->netstack_numzones == 0);
1076
1077 /* Finally remove from list of netstacks */
1078 mutex_enter(&netstack_g_lock);
1079 found = B_FALSE;
1080 for (nsp = &netstack_head; *nsp != NULL;
1081 nsp = &(*nsp)->netstack_next) {
1082 if (*nsp == ns) {
1083 *nsp = ns->netstack_next;
1084 ns->netstack_next = NULL;
1085 found = B_TRUE;
1086 break;
1087 }
1088 }
1089 ASSERT(found);
1090 mutex_exit(&netstack_g_lock);
1091
1092 /* Make sure nothing increased the references */
1093 ASSERT(ns->netstack_refcnt == 0);
1094 ASSERT(ns->netstack_numzones == 0);
1095
1096 ASSERT(ns->netstack_flags & NSF_CLOSING);
1097
1098 for (i = 0; i < NS_MAX; i++) {
1099 nm_state_t *nms = &ns->netstack_m_state[i];
1100
1101 cv_destroy(&nms->nms_cv);
1102 }
1103 mutex_destroy(&ns->netstack_lock);
1104 cv_destroy(&ns->netstack_cv);
1105 kmem_free(ns, sizeof (*ns));
1106 }
1107 }
1108
1109 void
netstack_hold(netstack_t * ns)1110 netstack_hold(netstack_t *ns)
1111 {
1112 mutex_enter(&ns->netstack_lock);
1113 ns->netstack_refcnt++;
1114 ASSERT(ns->netstack_refcnt > 0);
1115 mutex_exit(&ns->netstack_lock);
1116 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1117 }
1118
1119 /*
1120 * To support kstat_create_netstack() using kstat_zone_add we need
1121 * to track both
1122 * - all zoneids that use the global/shared stack
1123 * - all kstats that have been added for the shared stack
1124 */
1125 kstat_t *
kstat_create_netstack(char * ks_module,int ks_instance,char * ks_name,char * ks_class,uchar_t ks_type,uint_t ks_ndata,uchar_t ks_flags,netstackid_t ks_netstackid)1126 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1127 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1128 netstackid_t ks_netstackid)
1129 {
1130 kstat_t *ks;
1131
1132 if (ks_netstackid == GLOBAL_NETSTACKID) {
1133 ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1134 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1135 if (ks != NULL)
1136 netstack_shared_kstat_add(ks);
1137 return (ks);
1138 } else {
1139 zoneid_t zoneid = ks_netstackid;
1140
1141 return (kstat_create_zone(ks_module, ks_instance, ks_name,
1142 ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1143 }
1144 }
1145
1146 void
kstat_delete_netstack(kstat_t * ks,netstackid_t ks_netstackid)1147 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1148 {
1149 if (ks_netstackid == GLOBAL_NETSTACKID) {
1150 netstack_shared_kstat_remove(ks);
1151 }
1152 kstat_delete(ks);
1153 }
1154
1155 static void
netstack_shared_zone_add(zoneid_t zoneid)1156 netstack_shared_zone_add(zoneid_t zoneid)
1157 {
1158 struct shared_zone_list *sz;
1159 struct shared_kstat_list *sk;
1160
1161 sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1162 sz->sz_zoneid = zoneid;
1163
1164 /* Insert in list */
1165 mutex_enter(&netstack_shared_lock);
1166 sz->sz_next = netstack_shared_zones;
1167 netstack_shared_zones = sz;
1168
1169 /*
1170 * Perform kstat_zone_add for each existing shared stack kstat.
1171 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1172 */
1173 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1174 kstat_zone_add(sk->sk_kstat, zoneid);
1175 }
1176 mutex_exit(&netstack_shared_lock);
1177 }
1178
1179 static void
netstack_shared_zone_remove(zoneid_t zoneid)1180 netstack_shared_zone_remove(zoneid_t zoneid)
1181 {
1182 struct shared_zone_list **szp, *sz;
1183 struct shared_kstat_list *sk;
1184
1185 /* Find in list */
1186 mutex_enter(&netstack_shared_lock);
1187 sz = NULL;
1188 for (szp = &netstack_shared_zones; *szp != NULL;
1189 szp = &((*szp)->sz_next)) {
1190 if ((*szp)->sz_zoneid == zoneid) {
1191 sz = *szp;
1192 break;
1193 }
1194 }
1195 /* We must find it */
1196 ASSERT(sz != NULL);
1197 *szp = sz->sz_next;
1198 sz->sz_next = NULL;
1199
1200 /*
1201 * Perform kstat_zone_remove for each existing shared stack kstat.
1202 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1203 */
1204 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1205 kstat_zone_remove(sk->sk_kstat, zoneid);
1206 }
1207 mutex_exit(&netstack_shared_lock);
1208
1209 kmem_free(sz, sizeof (*sz));
1210 }
1211
1212 static void
netstack_shared_kstat_add(kstat_t * ks)1213 netstack_shared_kstat_add(kstat_t *ks)
1214 {
1215 struct shared_zone_list *sz;
1216 struct shared_kstat_list *sk;
1217
1218 sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1219 sk->sk_kstat = ks;
1220
1221 /* Insert in list */
1222 mutex_enter(&netstack_shared_lock);
1223 sk->sk_next = netstack_shared_kstats;
1224 netstack_shared_kstats = sk;
1225
1226 /*
1227 * Perform kstat_zone_add for each existing shared stack zone.
1228 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1229 */
1230 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1231 kstat_zone_add(ks, sz->sz_zoneid);
1232 }
1233 mutex_exit(&netstack_shared_lock);
1234 }
1235
1236 static void
netstack_shared_kstat_remove(kstat_t * ks)1237 netstack_shared_kstat_remove(kstat_t *ks)
1238 {
1239 struct shared_zone_list *sz;
1240 struct shared_kstat_list **skp, *sk;
1241
1242 /* Find in list */
1243 mutex_enter(&netstack_shared_lock);
1244 sk = NULL;
1245 for (skp = &netstack_shared_kstats; *skp != NULL;
1246 skp = &((*skp)->sk_next)) {
1247 if ((*skp)->sk_kstat == ks) {
1248 sk = *skp;
1249 break;
1250 }
1251 }
1252 /* Must find it */
1253 ASSERT(sk != NULL);
1254 *skp = sk->sk_next;
1255 sk->sk_next = NULL;
1256
1257 /*
1258 * Perform kstat_zone_remove for each existing shared stack kstat.
1259 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1260 */
1261 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1262 kstat_zone_remove(ks, sz->sz_zoneid);
1263 }
1264 mutex_exit(&netstack_shared_lock);
1265 kmem_free(sk, sizeof (*sk));
1266 }
1267
1268 /*
1269 * If a zoneid is part of the shared zone, return true
1270 */
1271 static boolean_t
netstack_find_shared_zoneid(zoneid_t zoneid)1272 netstack_find_shared_zoneid(zoneid_t zoneid)
1273 {
1274 struct shared_zone_list *sz;
1275
1276 mutex_enter(&netstack_shared_lock);
1277 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1278 if (sz->sz_zoneid == zoneid) {
1279 mutex_exit(&netstack_shared_lock);
1280 return (B_TRUE);
1281 }
1282 }
1283 mutex_exit(&netstack_shared_lock);
1284 return (B_FALSE);
1285 }
1286
1287 /*
1288 * Hide the fact that zoneids and netstackids are allocated from
1289 * the same space in the current implementation.
1290 * We currently do not check that the stackid/zoneids are valid, since there
1291 * is no need for that. But this should only be done for ids that are
1292 * valid.
1293 */
1294 zoneid_t
netstackid_to_zoneid(netstackid_t stackid)1295 netstackid_to_zoneid(netstackid_t stackid)
1296 {
1297 return (stackid);
1298 }
1299
1300 netstackid_t
zoneid_to_netstackid(zoneid_t zoneid)1301 zoneid_to_netstackid(zoneid_t zoneid)
1302 {
1303 if (netstack_find_shared_zoneid(zoneid))
1304 return (GLOBAL_ZONEID);
1305 else
1306 return (zoneid);
1307 }
1308
1309 zoneid_t
netstack_get_zoneid(netstack_t * ns)1310 netstack_get_zoneid(netstack_t *ns)
1311 {
1312 return (netstackid_to_zoneid(ns->netstack_stackid));
1313 }
1314
1315 /*
1316 * Simplistic support for walking all the handles.
1317 * Example usage:
1318 * netstack_handle_t nh;
1319 * netstack_t *ns;
1320 *
1321 * netstack_next_init(&nh);
1322 * while ((ns = netstack_next(&nh)) != NULL) {
1323 * do something;
1324 * netstack_rele(ns);
1325 * }
1326 * netstack_next_fini(&nh);
1327 */
1328 void
netstack_next_init(netstack_handle_t * handle)1329 netstack_next_init(netstack_handle_t *handle)
1330 {
1331 *handle = 0;
1332 }
1333
1334 /* ARGSUSED */
1335 void
netstack_next_fini(netstack_handle_t * handle)1336 netstack_next_fini(netstack_handle_t *handle)
1337 {
1338 }
1339
1340 netstack_t *
netstack_next(netstack_handle_t * handle)1341 netstack_next(netstack_handle_t *handle)
1342 {
1343 netstack_t *ns;
1344 int i, end;
1345
1346 end = *handle;
1347 /* Walk skipping *handle number of instances */
1348
1349 /* Look if there is a matching stack instance */
1350 mutex_enter(&netstack_g_lock);
1351 ns = netstack_head;
1352 for (i = 0; i < end; i++) {
1353 if (ns == NULL)
1354 break;
1355 ns = ns->netstack_next;
1356 }
1357 /* skip those with that aren't really here */
1358 while (ns != NULL) {
1359 mutex_enter(&ns->netstack_lock);
1360 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1361 mutex_exit(&ns->netstack_lock);
1362 break;
1363 }
1364 mutex_exit(&ns->netstack_lock);
1365 end++;
1366 ns = ns->netstack_next;
1367 }
1368 if (ns != NULL) {
1369 *handle = end + 1;
1370 netstack_hold(ns);
1371 }
1372 mutex_exit(&netstack_g_lock);
1373 return (ns);
1374 }
1375