xref: /titanic_44/usr/src/uts/common/os/netstack.c (revision 98157a7002f4f2cf7978f3084ca5577f0a1d72b2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/vm.h>
32 #include <sys/proc.h>
33 #include <sys/tuneable.h>
34 #include <sys/systm.h>
35 #include <sys/cmn_err.h>
36 #include <sys/debug.h>
37 #include <sys/sdt.h>
38 #include <sys/mutex.h>
39 #include <sys/bitmap.h>
40 #include <sys/atomic.h>
41 #include <sys/kobj.h>
42 #include <sys/disp.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/zone.h>
45 #include <sys/netstack.h>
46 
47 /*
48  * What we use so that the zones framework can tell us about new zones,
49  * which we use to create new stacks.
50  */
51 static zone_key_t netstack_zone_key;
52 
53 static int	netstack_initialized = 0;
54 
55 /*
56  * Track the registered netstacks.
57  * The global lock protects
58  * - ns_reg
59  * - the list starting at netstack_head and following the netstack_next
60  *   pointers.
61  */
62 static kmutex_t netstack_g_lock;
63 
64 /*
65  * Registry of netstacks with their create/shutdown/destory functions.
66  */
67 static struct netstack_registry	ns_reg[NS_MAX];
68 
69 /*
70  * Global list of existing stacks.  We use this when a new zone with
71  * an exclusive IP instance is created.
72  *
73  * Note that in some cases a netstack_t needs to stay around after the zone
74  * has gone away. This is because there might be outstanding references
75  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76  * structure and all the foo_stack_t's hanging off of it will be cleaned up
77  * when the last reference to it is dropped.
78  * However, the same zone might be rebooted. That is handled using the
79  * assumption that the zones framework picks a new zoneid each time a zone
80  * is (re)booted. We assert for that condition in netstack_zone_create().
81  * Thus the old netstack_t can take its time for things to time out.
82  */
83 static netstack_t *netstack_head;
84 
85 /*
86  * To support kstat_create_netstack() using kstat_zone_add we need
87  * to track both
88  *  - all zoneids that use the global/shared stack
89  *  - all kstats that have been added for the shared stack
90  */
91 struct shared_zone_list {
92 	struct shared_zone_list *sz_next;
93 	zoneid_t		sz_zoneid;
94 };
95 
96 struct shared_kstat_list {
97 	struct shared_kstat_list *sk_next;
98 	kstat_t			 *sk_kstat;
99 };
100 
101 static kmutex_t netstack_shared_lock;	/* protects the following two */
102 static struct shared_zone_list	*netstack_shared_zones;
103 static struct shared_kstat_list	*netstack_shared_kstats;
104 
105 static void	*netstack_zone_create(zoneid_t zoneid);
106 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
108 
109 static void	netstack_shared_zone_add(zoneid_t zoneid);
110 static void	netstack_shared_zone_remove(zoneid_t zoneid);
111 static void	netstack_shared_kstat_add(kstat_t *ks);
112 static void	netstack_shared_kstat_remove(kstat_t *ks);
113 
114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115 
116 static void	apply_all_netstacks(int, applyfn_t *);
117 static void	apply_all_modules(netstack_t *, applyfn_t *);
118 static void	apply_all_modules_reverse(netstack_t *, applyfn_t *);
119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
124     kmutex_t *);
125 
126 void
127 netstack_init(void)
128 {
129 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
130 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
131 
132 	netstack_initialized = 1;
133 
134 	/*
135 	 * We want to be informed each time a zone is created or
136 	 * destroyed in the kernel, so we can maintain the
137 	 * stack instance information.
138 	 */
139 	zone_key_create(&netstack_zone_key, netstack_zone_create,
140 	    netstack_zone_shutdown, netstack_zone_destroy);
141 }
142 
143 /*
144  * Register a new module with the framework.
145  * This registers interest in changes to the set of netstacks.
146  * The createfn and destroyfn are required, but the shutdownfn can be
147  * NULL.
148  * Note that due to the current zsd implementation, when the create
149  * function is called the zone isn't fully present, thus functions
150  * like zone_find_by_* will fail, hence the create function can not
151  * use many zones kernel functions including zcmn_err().
152  */
153 void
154 netstack_register(int moduleid,
155     void *(*module_create)(netstackid_t, netstack_t *),
156     void (*module_shutdown)(netstackid_t, void *),
157     void (*module_destroy)(netstackid_t, void *))
158 {
159 	netstack_t *ns;
160 
161 	ASSERT(netstack_initialized);
162 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
163 	ASSERT(module_create != NULL);
164 
165 	/*
166 	 * Make instances created after this point in time run the create
167 	 * callback.
168 	 */
169 	mutex_enter(&netstack_g_lock);
170 	ASSERT(ns_reg[moduleid].nr_create == NULL);
171 	ASSERT(ns_reg[moduleid].nr_flags == 0);
172 	ns_reg[moduleid].nr_create = module_create;
173 	ns_reg[moduleid].nr_shutdown = module_shutdown;
174 	ns_reg[moduleid].nr_destroy = module_destroy;
175 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
176 
177 	/*
178 	 * Determine the set of stacks that exist before we drop the lock.
179 	 * Set NSS_CREATE_NEEDED for each of those.
180 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
181 	 * set, but check NSF_CLOSING to be sure.
182 	 */
183 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
184 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
185 
186 		mutex_enter(&ns->netstack_lock);
187 		if (!(ns->netstack_flags & NSF_CLOSING) &&
188 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
189 			nms->nms_flags |= NSS_CREATE_NEEDED;
190 			DTRACE_PROBE2(netstack__create__needed,
191 			    netstack_t *, ns, int, moduleid);
192 		}
193 		mutex_exit(&ns->netstack_lock);
194 	}
195 	mutex_exit(&netstack_g_lock);
196 
197 	/*
198 	 * At this point in time a new instance can be created or an instance
199 	 * can be destroyed, or some other module can register or unregister.
200 	 * Make sure we either run all the create functions for this moduleid
201 	 * or we wait for any other creators for this moduleid.
202 	 */
203 	apply_all_netstacks(moduleid, netstack_apply_create);
204 }
205 
206 void
207 netstack_unregister(int moduleid)
208 {
209 	netstack_t *ns;
210 
211 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
212 
213 	ASSERT(ns_reg[moduleid].nr_create != NULL);
214 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
215 
216 	mutex_enter(&netstack_g_lock);
217 	/*
218 	 * Determine the set of stacks that exist before we drop the lock.
219 	 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
220 	 * That ensures that when we return all the callbacks for existing
221 	 * instances have completed. And since we set NRF_DYING no new
222 	 * instances can use this module.
223 	 */
224 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
225 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
226 
227 		mutex_enter(&ns->netstack_lock);
228 		if (ns_reg[moduleid].nr_shutdown != NULL &&
229 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
230 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
231 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
232 			DTRACE_PROBE2(netstack__shutdown__needed,
233 			    netstack_t *, ns, int, moduleid);
234 		}
235 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
236 		    ns_reg[moduleid].nr_destroy != NULL &&
237 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
238 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
239 			nms->nms_flags |= NSS_DESTROY_NEEDED;
240 			DTRACE_PROBE2(netstack__destroy__needed,
241 			    netstack_t *, ns, int, moduleid);
242 		}
243 		mutex_exit(&ns->netstack_lock);
244 	}
245 	/*
246 	 * Prevent any new netstack from calling the registered create
247 	 * function, while keeping the function pointers in place until the
248 	 * shutdown and destroy callbacks are complete.
249 	 */
250 	ns_reg[moduleid].nr_flags |= NRF_DYING;
251 	mutex_exit(&netstack_g_lock);
252 
253 	apply_all_netstacks(moduleid, netstack_apply_shutdown);
254 	apply_all_netstacks(moduleid, netstack_apply_destroy);
255 
256 	/*
257 	 * Clear the nms_flags so that we can handle this module
258 	 * being loaded again.
259 	 * Also remove the registered functions.
260 	 */
261 	mutex_enter(&netstack_g_lock);
262 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
263 	ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
264 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
265 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
266 
267 		mutex_enter(&ns->netstack_lock);
268 		if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
269 			nms->nms_flags = 0;
270 			DTRACE_PROBE2(netstack__destroy__done,
271 			    netstack_t *, ns, int, moduleid);
272 		}
273 		mutex_exit(&ns->netstack_lock);
274 	}
275 
276 	ns_reg[moduleid].nr_create = NULL;
277 	ns_reg[moduleid].nr_shutdown = NULL;
278 	ns_reg[moduleid].nr_destroy = NULL;
279 	ns_reg[moduleid].nr_flags = 0;
280 	mutex_exit(&netstack_g_lock);
281 }
282 
283 /*
284  * Lookup and/or allocate a netstack for this zone.
285  */
286 static void *
287 netstack_zone_create(zoneid_t zoneid)
288 {
289 	netstackid_t stackid;
290 	netstack_t *ns;
291 	netstack_t **nsp;
292 	zone_t	*zone;
293 	int i;
294 
295 	ASSERT(netstack_initialized);
296 
297 	zone = zone_find_by_id_nolock(zoneid);
298 	ASSERT(zone != NULL);
299 
300 	if (zone->zone_flags & ZF_NET_EXCL) {
301 		stackid = zoneid;
302 	} else {
303 		/* Look for the stack instance for the global */
304 		stackid = GLOBAL_NETSTACKID;
305 	}
306 
307 	/* Allocate even if it isn't needed; simplifies locking */
308 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
309 
310 	/* Look if there is a matching stack instance */
311 	mutex_enter(&netstack_g_lock);
312 	for (nsp = &netstack_head; *nsp != NULL;
313 	    nsp = &((*nsp)->netstack_next)) {
314 		if ((*nsp)->netstack_stackid == stackid) {
315 			/*
316 			 * Should never find a pre-existing exclusive stack
317 			 */
318 			ASSERT(stackid == GLOBAL_NETSTACKID);
319 			kmem_free(ns, sizeof (netstack_t));
320 			ns = *nsp;
321 			mutex_enter(&ns->netstack_lock);
322 			ns->netstack_numzones++;
323 			mutex_exit(&ns->netstack_lock);
324 			mutex_exit(&netstack_g_lock);
325 			DTRACE_PROBE1(netstack__inc__numzones,
326 			    netstack_t *, ns);
327 			/* Record that we have a new shared stack zone */
328 			netstack_shared_zone_add(zoneid);
329 			zone->zone_netstack = ns;
330 			return (ns);
331 		}
332 	}
333 	/* Not found */
334 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
335 	cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
336 	ns->netstack_stackid = zoneid;
337 	ns->netstack_numzones = 1;
338 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
339 	ns->netstack_flags = NSF_UNINIT;
340 	*nsp = ns;
341 	zone->zone_netstack = ns;
342 
343 	mutex_enter(&ns->netstack_lock);
344 	/*
345 	 * Mark this netstack as having a CREATE running so
346 	 * any netstack_register/netstack_unregister waits for
347 	 * the existing create callbacks to complete in moduleid order
348 	 */
349 	ns->netstack_flags |= NSF_ZONE_CREATE;
350 
351 	/*
352 	 * Determine the set of module create functions that need to be
353 	 * called before we drop the lock.
354 	 * Set NSS_CREATE_NEEDED for each of those.
355 	 * Skip any with NRF_DYING set, since those are in the process of
356 	 * going away, by checking for flags being exactly NRF_REGISTERED.
357 	 */
358 	for (i = 0; i < NS_MAX; i++) {
359 		nm_state_t *nms = &ns->netstack_m_state[i];
360 
361 		cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
362 
363 		if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
364 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
365 			nms->nms_flags |= NSS_CREATE_NEEDED;
366 			DTRACE_PROBE2(netstack__create__needed,
367 			    netstack_t *, ns, int, i);
368 		}
369 	}
370 	mutex_exit(&ns->netstack_lock);
371 	mutex_exit(&netstack_g_lock);
372 
373 	apply_all_modules(ns, netstack_apply_create);
374 
375 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
376 	mutex_enter(&ns->netstack_lock);
377 	ns->netstack_flags &= ~NSF_UNINIT;
378 	ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
379 	ns->netstack_flags &= ~NSF_ZONE_CREATE;
380 	cv_broadcast(&ns->netstack_cv);
381 	mutex_exit(&ns->netstack_lock);
382 
383 	return (ns);
384 }
385 
386 /* ARGSUSED */
387 static void
388 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
389 {
390 	netstack_t *ns = (netstack_t *)arg;
391 	int i;
392 
393 	ASSERT(arg != NULL);
394 
395 	mutex_enter(&ns->netstack_lock);
396 	ASSERT(ns->netstack_numzones > 0);
397 	if (ns->netstack_numzones != 1) {
398 		/* Stack instance being used by other zone */
399 		mutex_exit(&ns->netstack_lock);
400 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
401 		return;
402 	}
403 	mutex_exit(&ns->netstack_lock);
404 
405 	mutex_enter(&netstack_g_lock);
406 	mutex_enter(&ns->netstack_lock);
407 	/*
408 	 * Mark this netstack as having a SHUTDOWN running so
409 	 * any netstack_register/netstack_unregister waits for
410 	 * the existing create callbacks to complete in moduleid order
411 	 */
412 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
413 	ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
414 
415 	/*
416 	 * Determine the set of stacks that exist before we drop the lock.
417 	 * Set NSS_SHUTDOWN_NEEDED for each of those.
418 	 */
419 	for (i = 0; i < NS_MAX; i++) {
420 		nm_state_t *nms = &ns->netstack_m_state[i];
421 
422 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
423 		    ns_reg[i].nr_shutdown != NULL &&
424 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
425 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
426 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
427 			DTRACE_PROBE2(netstack__shutdown__needed,
428 			    netstack_t *, ns, int, i);
429 		}
430 	}
431 	mutex_exit(&ns->netstack_lock);
432 	mutex_exit(&netstack_g_lock);
433 
434 	/*
435 	 * Call the shutdown function for all registered modules for this
436 	 * netstack.
437 	 */
438 	apply_all_modules(ns, netstack_apply_shutdown);
439 
440 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
441 	mutex_enter(&ns->netstack_lock);
442 	ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
443 	ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
444 	cv_broadcast(&ns->netstack_cv);
445 	mutex_exit(&ns->netstack_lock);
446 }
447 
448 /*
449  * Common routine to release a zone.
450  * If this was the last zone using the stack instance then prepare to
451  * have the refcnt dropping to zero free the zone.
452  */
453 /* ARGSUSED */
454 static void
455 netstack_zone_destroy(zoneid_t zoneid, void *arg)
456 {
457 	netstack_t *ns = (netstack_t *)arg;
458 
459 	ASSERT(arg != NULL);
460 
461 	mutex_enter(&ns->netstack_lock);
462 	ASSERT(ns->netstack_numzones > 0);
463 	ns->netstack_numzones--;
464 	if (ns->netstack_numzones != 0) {
465 		/* Stack instance being used by other zone */
466 		mutex_exit(&ns->netstack_lock);
467 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
468 		/* Record that we a shared stack zone has gone away */
469 		netstack_shared_zone_remove(zoneid);
470 		return;
471 	}
472 	/*
473 	 * Set CLOSING so that netstack_find_by will not find it.
474 	 */
475 	ns->netstack_flags |= NSF_CLOSING;
476 	mutex_exit(&ns->netstack_lock);
477 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
478 	/* No other thread can call zone_destroy for this stack */
479 
480 	/*
481 	 * Decrease refcnt to account for the one in netstack_zone_init()
482 	 */
483 	netstack_rele(ns);
484 }
485 
486 /*
487  * Called when the reference count drops to zero.
488  * Call the destroy functions for each registered module.
489  */
490 static void
491 netstack_stack_inactive(netstack_t *ns)
492 {
493 	int i;
494 
495 	mutex_enter(&netstack_g_lock);
496 	mutex_enter(&ns->netstack_lock);
497 	/*
498 	 * Mark this netstack as having a DESTROY running so
499 	 * any netstack_register/netstack_unregister waits for
500 	 * the existing destroy callbacks to complete in reverse moduleid order
501 	 */
502 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
503 	ns->netstack_flags |= NSF_ZONE_DESTROY;
504 	/*
505 	 * If the shutdown callback wasn't called earlier (e.g., if this is
506 	 * a netstack shared between multiple zones), then we schedule it now.
507 	 *
508 	 * Determine the set of stacks that exist before we drop the lock.
509 	 * Set NSS_DESTROY_NEEDED for each of those. That
510 	 * ensures that when we return all the callbacks for existing
511 	 * instances have completed.
512 	 */
513 	for (i = 0; i < NS_MAX; i++) {
514 		nm_state_t *nms = &ns->netstack_m_state[i];
515 
516 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
517 		    ns_reg[i].nr_shutdown != NULL &&
518 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
519 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
520 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
521 			DTRACE_PROBE2(netstack__shutdown__needed,
522 			    netstack_t *, ns, int, i);
523 		}
524 
525 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
526 		    ns_reg[i].nr_destroy != NULL &&
527 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
528 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
529 			nms->nms_flags |= NSS_DESTROY_NEEDED;
530 			DTRACE_PROBE2(netstack__destroy__needed,
531 			    netstack_t *, ns, int, i);
532 		}
533 	}
534 	mutex_exit(&ns->netstack_lock);
535 	mutex_exit(&netstack_g_lock);
536 
537 	/*
538 	 * Call the shutdown and destroy functions for all registered modules
539 	 * for this netstack.
540 	 *
541 	 * Since there are some ordering dependencies between the modules we
542 	 * tear them down in the reverse order of what was used to create them.
543 	 *
544 	 * Since a netstack_t is never reused (when a zone is rebooted it gets
545 	 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
546 	 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
547 	 * That is different than in the netstack_unregister() case.
548 	 */
549 	apply_all_modules(ns, netstack_apply_shutdown);
550 	apply_all_modules_reverse(ns, netstack_apply_destroy);
551 
552 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
553 	mutex_enter(&ns->netstack_lock);
554 	ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
555 	ns->netstack_flags &= ~NSF_ZONE_DESTROY;
556 	cv_broadcast(&ns->netstack_cv);
557 	mutex_exit(&ns->netstack_lock);
558 }
559 
560 /*
561  * Apply a function to all netstacks for a particular moduleid.
562  *
563  * If there is any zone activity (due to a zone being created, shutdown,
564  * or destroyed) we wait for that to complete before we proceed. This ensures
565  * that the moduleids are processed in order when a zone is created or
566  * destroyed.
567  *
568  * The applyfn has to drop netstack_g_lock if it does some work.
569  * In that case we don't follow netstack_next,
570  * even if it is possible to do so without any hazards. This is
571  * because we want the design to allow for the list of netstacks threaded
572  * by netstack_next to change in any arbitrary way during the time the
573  * lock was dropped.
574  *
575  * It is safe to restart the loop at netstack_head since the applyfn
576  * changes netstack_m_state as it processes things, so a subsequent
577  * pass through will have no effect in applyfn, hence the loop will terminate
578  * in at worst O(N^2).
579  */
580 static void
581 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
582 {
583 	netstack_t *ns;
584 
585 	mutex_enter(&netstack_g_lock);
586 	ns = netstack_head;
587 	while (ns != NULL) {
588 		if (wait_for_zone_creator(ns, &netstack_g_lock)) {
589 			/* Lock dropped - restart at head */
590 			ns = netstack_head;
591 		} else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
592 			/* Lock dropped - restart at head */
593 			ns = netstack_head;
594 		} else {
595 			ns = ns->netstack_next;
596 		}
597 	}
598 	mutex_exit(&netstack_g_lock);
599 }
600 
601 /*
602  * Apply a function to all moduleids for a particular netstack.
603  *
604  * Since the netstack linkage doesn't matter in this case we can
605  * ignore whether the function drops the lock.
606  */
607 static void
608 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
609 {
610 	int i;
611 
612 	mutex_enter(&netstack_g_lock);
613 	for (i = 0; i < NS_MAX; i++) {
614 		/*
615 		 * We don't care whether the lock was dropped
616 		 * since we are not iterating over netstack_head.
617 		 */
618 		(void) (applyfn)(&netstack_g_lock, ns, i);
619 	}
620 	mutex_exit(&netstack_g_lock);
621 }
622 
623 /* Like the above but in reverse moduleid order */
624 static void
625 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
626 {
627 	int i;
628 
629 	mutex_enter(&netstack_g_lock);
630 	for (i = NS_MAX-1; i >= 0; i--) {
631 		/*
632 		 * We don't care whether the lock was dropped
633 		 * since we are not iterating over netstack_head.
634 		 */
635 		(void) (applyfn)(&netstack_g_lock, ns, i);
636 	}
637 	mutex_exit(&netstack_g_lock);
638 }
639 
640 /*
641  * Call the create function for the ns and moduleid if CREATE_NEEDED
642  * is set.
643  * If some other thread gets here first and sets *_INPROGRESS, then
644  * we wait for that thread to complete so that we can ensure that
645  * all the callbacks are done when we've looped over all netstacks/moduleids.
646  *
647  * When we call the create function, we temporarily drop the netstack_lock
648  * held by the caller, and return true to tell the caller it needs to
649  * re-evalute the state.
650  */
651 static boolean_t
652 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
653 {
654 	void *result;
655 	netstackid_t stackid;
656 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
657 	boolean_t dropped = B_FALSE;
658 
659 	ASSERT(MUTEX_HELD(lockp));
660 	mutex_enter(&ns->netstack_lock);
661 
662 	if (wait_for_nms_inprogress(ns, nms, lockp))
663 		dropped = B_TRUE;
664 
665 	if (nms->nms_flags & NSS_CREATE_NEEDED) {
666 		nms->nms_flags &= ~NSS_CREATE_NEEDED;
667 		nms->nms_flags |= NSS_CREATE_INPROGRESS;
668 		DTRACE_PROBE2(netstack__create__inprogress,
669 		    netstack_t *, ns, int, moduleid);
670 		mutex_exit(&ns->netstack_lock);
671 		mutex_exit(lockp);
672 		dropped = B_TRUE;
673 
674 		ASSERT(ns_reg[moduleid].nr_create != NULL);
675 		stackid = ns->netstack_stackid;
676 		DTRACE_PROBE2(netstack__create__start,
677 		    netstackid_t, stackid,
678 		    netstack_t *, ns);
679 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
680 		DTRACE_PROBE2(netstack__create__end,
681 		    void *, result, netstack_t *, ns);
682 
683 		ASSERT(result != NULL);
684 		mutex_enter(lockp);
685 		mutex_enter(&ns->netstack_lock);
686 		ns->netstack_modules[moduleid] = result;
687 		nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
688 		nms->nms_flags |= NSS_CREATE_COMPLETED;
689 		cv_broadcast(&nms->nms_cv);
690 		DTRACE_PROBE2(netstack__create__completed,
691 		    netstack_t *, ns, int, moduleid);
692 		mutex_exit(&ns->netstack_lock);
693 		return (dropped);
694 	} else {
695 		mutex_exit(&ns->netstack_lock);
696 		return (dropped);
697 	}
698 }
699 
700 /*
701  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
702  * is set.
703  * If some other thread gets here first and sets *_INPROGRESS, then
704  * we wait for that thread to complete so that we can ensure that
705  * all the callbacks are done when we've looped over all netstacks/moduleids.
706  *
707  * When we call the shutdown function, we temporarily drop the netstack_lock
708  * held by the caller, and return true to tell the caller it needs to
709  * re-evalute the state.
710  */
711 static boolean_t
712 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
713 {
714 	netstackid_t stackid;
715 	void * netstack_module;
716 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
717 	boolean_t dropped = B_FALSE;
718 
719 	ASSERT(MUTEX_HELD(lockp));
720 	mutex_enter(&ns->netstack_lock);
721 
722 	if (wait_for_nms_inprogress(ns, nms, lockp))
723 		dropped = B_TRUE;
724 
725 	if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
726 		nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
727 		nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
728 		DTRACE_PROBE2(netstack__shutdown__inprogress,
729 		    netstack_t *, ns, int, moduleid);
730 		mutex_exit(&ns->netstack_lock);
731 		mutex_exit(lockp);
732 		dropped = B_TRUE;
733 
734 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
735 		stackid = ns->netstack_stackid;
736 		netstack_module = ns->netstack_modules[moduleid];
737 		DTRACE_PROBE2(netstack__shutdown__start,
738 		    netstackid_t, stackid,
739 		    void *, netstack_module);
740 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
741 		DTRACE_PROBE1(netstack__shutdown__end,
742 		    netstack_t *, ns);
743 
744 		mutex_enter(lockp);
745 		mutex_enter(&ns->netstack_lock);
746 		nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
747 		nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
748 		cv_broadcast(&nms->nms_cv);
749 		DTRACE_PROBE2(netstack__shutdown__completed,
750 		    netstack_t *, ns, int, moduleid);
751 		mutex_exit(&ns->netstack_lock);
752 		return (dropped);
753 	} else {
754 		mutex_exit(&ns->netstack_lock);
755 		return (dropped);
756 	}
757 }
758 
759 /*
760  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
761  * is set.
762  * If some other thread gets here first and sets *_INPROGRESS, then
763  * we wait for that thread to complete so that we can ensure that
764  * all the callbacks are done when we've looped over all netstacks/moduleids.
765  *
766  * When we call the destroy function, we temporarily drop the netstack_lock
767  * held by the caller, and return true to tell the caller it needs to
768  * re-evalute the state.
769  */
770 static boolean_t
771 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
772 {
773 	netstackid_t stackid;
774 	void * netstack_module;
775 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
776 	boolean_t dropped = B_FALSE;
777 
778 	ASSERT(MUTEX_HELD(lockp));
779 	mutex_enter(&ns->netstack_lock);
780 
781 	if (wait_for_nms_inprogress(ns, nms, lockp))
782 		dropped = B_TRUE;
783 
784 	if (nms->nms_flags & NSS_DESTROY_NEEDED) {
785 		nms->nms_flags &= ~NSS_DESTROY_NEEDED;
786 		nms->nms_flags |= NSS_DESTROY_INPROGRESS;
787 		DTRACE_PROBE2(netstack__destroy__inprogress,
788 		    netstack_t *, ns, int, moduleid);
789 		mutex_exit(&ns->netstack_lock);
790 		mutex_exit(lockp);
791 		dropped = B_TRUE;
792 
793 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
794 		stackid = ns->netstack_stackid;
795 		netstack_module = ns->netstack_modules[moduleid];
796 		DTRACE_PROBE2(netstack__destroy__start,
797 		    netstackid_t, stackid,
798 		    void *, netstack_module);
799 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
800 		DTRACE_PROBE1(netstack__destroy__end,
801 		    netstack_t *, ns);
802 
803 		mutex_enter(lockp);
804 		mutex_enter(&ns->netstack_lock);
805 		ns->netstack_modules[moduleid] = NULL;
806 		nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
807 		nms->nms_flags |= NSS_DESTROY_COMPLETED;
808 		cv_broadcast(&nms->nms_cv);
809 		DTRACE_PROBE2(netstack__destroy__completed,
810 		    netstack_t *, ns, int, moduleid);
811 		mutex_exit(&ns->netstack_lock);
812 		return (dropped);
813 	} else {
814 		mutex_exit(&ns->netstack_lock);
815 		return (dropped);
816 	}
817 }
818 
819 /*
820  * If somebody  is creating the netstack (due to a new zone being created)
821  * then we wait for them to complete. This ensures that any additional
822  * netstack_register() doesn't cause the create functions to run out of
823  * order.
824  * Note that we do not need such a global wait in the case of the shutdown
825  * and destroy callbacks, since in that case it is sufficient for both
826  * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
827  * Returns true if lockp was temporarily dropped while waiting.
828  */
829 static boolean_t
830 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
831 {
832 	boolean_t dropped = B_FALSE;
833 
834 	mutex_enter(&ns->netstack_lock);
835 	while (ns->netstack_flags & NSF_ZONE_CREATE) {
836 		DTRACE_PROBE1(netstack__wait__zone__inprogress,
837 		    netstack_t *, ns);
838 		if (lockp != NULL) {
839 			dropped = B_TRUE;
840 			mutex_exit(lockp);
841 		}
842 		cv_wait(&ns->netstack_cv, &ns->netstack_lock);
843 		if (lockp != NULL) {
844 			/* First drop netstack_lock to preserve order */
845 			mutex_exit(&ns->netstack_lock);
846 			mutex_enter(lockp);
847 			mutex_enter(&ns->netstack_lock);
848 		}
849 	}
850 	mutex_exit(&ns->netstack_lock);
851 	return (dropped);
852 }
853 
854 /*
855  * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
856  * combination.
857  * Returns true if lockp was temporarily dropped while waiting.
858  */
859 static boolean_t
860 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
861 {
862 	boolean_t dropped = B_FALSE;
863 
864 	while (nms->nms_flags & NSS_ALL_INPROGRESS) {
865 		DTRACE_PROBE2(netstack__wait__nms__inprogress,
866 		    netstack_t *, ns, nm_state_t *, nms);
867 		if (lockp != NULL) {
868 			dropped = B_TRUE;
869 			mutex_exit(lockp);
870 		}
871 		cv_wait(&nms->nms_cv, &ns->netstack_lock);
872 		if (lockp != NULL) {
873 			/* First drop netstack_lock to preserve order */
874 			mutex_exit(&ns->netstack_lock);
875 			mutex_enter(lockp);
876 			mutex_enter(&ns->netstack_lock);
877 		}
878 	}
879 	return (dropped);
880 }
881 
882 /*
883  * Get the stack instance used in caller's zone.
884  * Increases the reference count, caller must do a netstack_rele.
885  * It can't be called after zone_destroy() has started.
886  */
887 netstack_t *
888 netstack_get_current(void)
889 {
890 	netstack_t *ns;
891 
892 	ns = curproc->p_zone->zone_netstack;
893 	ASSERT(ns != NULL);
894 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
895 		return (NULL);
896 
897 	netstack_hold(ns);
898 
899 	return (ns);
900 }
901 
902 /*
903  * Find a stack instance given the cred.
904  * This is used by the modules to potentially allow for a future when
905  * something other than the zoneid is used to determine the stack.
906  */
907 netstack_t *
908 netstack_find_by_cred(const cred_t *cr)
909 {
910 	zoneid_t zoneid = crgetzoneid(cr);
911 
912 	/* Handle the case when cr_zone is NULL */
913 	if (zoneid == (zoneid_t)-1)
914 		zoneid = GLOBAL_ZONEID;
915 
916 	/* For performance ... */
917 	if (curproc->p_zone->zone_id == zoneid)
918 		return (netstack_get_current());
919 	else
920 		return (netstack_find_by_zoneid(zoneid));
921 }
922 
923 /*
924  * Find a stack instance given the zoneid.
925  * Increases the reference count if found; caller must do a
926  * netstack_rele().
927  *
928  * If there is no exact match then assume the shared stack instance
929  * matches.
930  *
931  * Skip the unitialized ones.
932  */
933 netstack_t *
934 netstack_find_by_zoneid(zoneid_t zoneid)
935 {
936 	netstack_t *ns;
937 	zone_t *zone;
938 
939 	zone = zone_find_by_id(zoneid);
940 
941 	if (zone == NULL)
942 		return (NULL);
943 
944 	ns = zone->zone_netstack;
945 	ASSERT(ns != NULL);
946 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
947 		ns = NULL;
948 	else
949 		netstack_hold(ns);
950 
951 	zone_rele(zone);
952 	return (ns);
953 }
954 
955 /*
956  * Find a stack instance given the zoneid. Can only be called from
957  * the create callback. See the comments in zone_find_by_id_nolock why
958  * that limitation exists.
959  *
960  * Increases the reference count if found; caller must do a
961  * netstack_rele().
962  *
963  * If there is no exact match then assume the shared stack instance
964  * matches.
965  *
966  * Skip the unitialized ones.
967  */
968 netstack_t *
969 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
970 {
971 	netstack_t *ns;
972 	zone_t *zone;
973 
974 	zone = zone_find_by_id_nolock(zoneid);
975 
976 	if (zone == NULL)
977 		return (NULL);
978 
979 	ns = zone->zone_netstack;
980 	ASSERT(ns != NULL);
981 
982 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
983 		ns = NULL;
984 	else
985 		netstack_hold(ns);
986 
987 	/* zone_find_by_id_nolock does not have a hold on the zone */
988 	return (ns);
989 }
990 
991 /*
992  * Find a stack instance given the stackid with exact match?
993  * Increases the reference count if found; caller must do a
994  * netstack_rele().
995  *
996  * Skip the unitialized ones.
997  */
998 netstack_t *
999 netstack_find_by_stackid(netstackid_t stackid)
1000 {
1001 	netstack_t *ns;
1002 
1003 	mutex_enter(&netstack_g_lock);
1004 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1005 		mutex_enter(&ns->netstack_lock);
1006 		if (ns->netstack_stackid == stackid &&
1007 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1008 			mutex_exit(&ns->netstack_lock);
1009 			netstack_hold(ns);
1010 			mutex_exit(&netstack_g_lock);
1011 			return (ns);
1012 		}
1013 		mutex_exit(&ns->netstack_lock);
1014 	}
1015 	mutex_exit(&netstack_g_lock);
1016 	return (NULL);
1017 }
1018 
1019 void
1020 netstack_rele(netstack_t *ns)
1021 {
1022 	netstack_t **nsp;
1023 	boolean_t found;
1024 	int refcnt, numzones;
1025 	int i;
1026 
1027 	mutex_enter(&ns->netstack_lock);
1028 	ASSERT(ns->netstack_refcnt > 0);
1029 	ns->netstack_refcnt--;
1030 	/*
1031 	 * As we drop the lock additional netstack_rele()s can come in
1032 	 * and decrement the refcnt to zero and free the netstack_t.
1033 	 * Store pointers in local variables and if we were not the last
1034 	 * then don't reference the netstack_t after that.
1035 	 */
1036 	refcnt = ns->netstack_refcnt;
1037 	numzones = ns->netstack_numzones;
1038 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1039 	mutex_exit(&ns->netstack_lock);
1040 
1041 	if (refcnt == 0 && numzones == 0) {
1042 		/*
1043 		 * Time to call the destroy functions and free up
1044 		 * the structure
1045 		 */
1046 		netstack_stack_inactive(ns);
1047 
1048 		/* Make sure nothing increased the references */
1049 		ASSERT(ns->netstack_refcnt == 0);
1050 		ASSERT(ns->netstack_numzones == 0);
1051 
1052 		/* Finally remove from list of netstacks */
1053 		mutex_enter(&netstack_g_lock);
1054 		found = B_FALSE;
1055 		for (nsp = &netstack_head; *nsp != NULL;
1056 		    nsp = &(*nsp)->netstack_next) {
1057 			if (*nsp == ns) {
1058 				*nsp = ns->netstack_next;
1059 				ns->netstack_next = NULL;
1060 				found = B_TRUE;
1061 				break;
1062 			}
1063 		}
1064 		ASSERT(found);
1065 		mutex_exit(&netstack_g_lock);
1066 
1067 		/* Make sure nothing increased the references */
1068 		ASSERT(ns->netstack_refcnt == 0);
1069 		ASSERT(ns->netstack_numzones == 0);
1070 
1071 		ASSERT(ns->netstack_flags & NSF_CLOSING);
1072 
1073 		for (i = 0; i < NS_MAX; i++) {
1074 			nm_state_t *nms = &ns->netstack_m_state[i];
1075 
1076 			cv_destroy(&nms->nms_cv);
1077 		}
1078 		mutex_destroy(&ns->netstack_lock);
1079 		cv_destroy(&ns->netstack_cv);
1080 		kmem_free(ns, sizeof (*ns));
1081 	}
1082 }
1083 
1084 void
1085 netstack_hold(netstack_t *ns)
1086 {
1087 	mutex_enter(&ns->netstack_lock);
1088 	ns->netstack_refcnt++;
1089 	ASSERT(ns->netstack_refcnt > 0);
1090 	mutex_exit(&ns->netstack_lock);
1091 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1092 }
1093 
1094 /*
1095  * To support kstat_create_netstack() using kstat_zone_add we need
1096  * to track both
1097  *  - all zoneids that use the global/shared stack
1098  *  - all kstats that have been added for the shared stack
1099  */
1100 kstat_t *
1101 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1102     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1103     netstackid_t ks_netstackid)
1104 {
1105 	kstat_t *ks;
1106 
1107 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1108 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1109 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1110 		if (ks != NULL)
1111 			netstack_shared_kstat_add(ks);
1112 		return (ks);
1113 	} else {
1114 		zoneid_t zoneid = ks_netstackid;
1115 
1116 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
1117 		    ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1118 	}
1119 }
1120 
1121 void
1122 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1123 {
1124 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1125 		netstack_shared_kstat_remove(ks);
1126 	}
1127 	kstat_delete(ks);
1128 }
1129 
1130 static void
1131 netstack_shared_zone_add(zoneid_t zoneid)
1132 {
1133 	struct shared_zone_list *sz;
1134 	struct shared_kstat_list *sk;
1135 
1136 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1137 	sz->sz_zoneid = zoneid;
1138 
1139 	/* Insert in list */
1140 	mutex_enter(&netstack_shared_lock);
1141 	sz->sz_next = netstack_shared_zones;
1142 	netstack_shared_zones = sz;
1143 
1144 	/*
1145 	 * Perform kstat_zone_add for each existing shared stack kstat.
1146 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1147 	 */
1148 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1149 		kstat_zone_add(sk->sk_kstat, zoneid);
1150 	}
1151 	mutex_exit(&netstack_shared_lock);
1152 }
1153 
1154 static void
1155 netstack_shared_zone_remove(zoneid_t zoneid)
1156 {
1157 	struct shared_zone_list **szp, *sz;
1158 	struct shared_kstat_list *sk;
1159 
1160 	/* Find in list */
1161 	mutex_enter(&netstack_shared_lock);
1162 	sz = NULL;
1163 	for (szp = &netstack_shared_zones; *szp != NULL;
1164 	    szp = &((*szp)->sz_next)) {
1165 		if ((*szp)->sz_zoneid == zoneid) {
1166 			sz = *szp;
1167 			break;
1168 		}
1169 	}
1170 	/* We must find it */
1171 	ASSERT(sz != NULL);
1172 	*szp = sz->sz_next;
1173 	sz->sz_next = NULL;
1174 
1175 	/*
1176 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1177 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1178 	 */
1179 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1180 		kstat_zone_remove(sk->sk_kstat, zoneid);
1181 	}
1182 	mutex_exit(&netstack_shared_lock);
1183 
1184 	kmem_free(sz, sizeof (*sz));
1185 }
1186 
1187 static void
1188 netstack_shared_kstat_add(kstat_t *ks)
1189 {
1190 	struct shared_zone_list *sz;
1191 	struct shared_kstat_list *sk;
1192 
1193 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1194 	sk->sk_kstat = ks;
1195 
1196 	/* Insert in list */
1197 	mutex_enter(&netstack_shared_lock);
1198 	sk->sk_next = netstack_shared_kstats;
1199 	netstack_shared_kstats = sk;
1200 
1201 	/*
1202 	 * Perform kstat_zone_add for each existing shared stack zone.
1203 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1204 	 */
1205 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1206 		kstat_zone_add(ks, sz->sz_zoneid);
1207 	}
1208 	mutex_exit(&netstack_shared_lock);
1209 }
1210 
1211 static void
1212 netstack_shared_kstat_remove(kstat_t *ks)
1213 {
1214 	struct shared_zone_list *sz;
1215 	struct shared_kstat_list **skp, *sk;
1216 
1217 	/* Find in list */
1218 	mutex_enter(&netstack_shared_lock);
1219 	sk = NULL;
1220 	for (skp = &netstack_shared_kstats; *skp != NULL;
1221 	    skp = &((*skp)->sk_next)) {
1222 		if ((*skp)->sk_kstat == ks) {
1223 			sk = *skp;
1224 			break;
1225 		}
1226 	}
1227 	/* Must find it */
1228 	ASSERT(sk != NULL);
1229 	*skp = sk->sk_next;
1230 	sk->sk_next = NULL;
1231 
1232 	/*
1233 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1234 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1235 	 */
1236 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1237 		kstat_zone_remove(ks, sz->sz_zoneid);
1238 	}
1239 	mutex_exit(&netstack_shared_lock);
1240 	kmem_free(sk, sizeof (*sk));
1241 }
1242 
1243 /*
1244  * If a zoneid is part of the shared zone, return true
1245  */
1246 static boolean_t
1247 netstack_find_shared_zoneid(zoneid_t zoneid)
1248 {
1249 	struct shared_zone_list *sz;
1250 
1251 	mutex_enter(&netstack_shared_lock);
1252 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1253 		if (sz->sz_zoneid == zoneid) {
1254 			mutex_exit(&netstack_shared_lock);
1255 			return (B_TRUE);
1256 		}
1257 	}
1258 	mutex_exit(&netstack_shared_lock);
1259 	return (B_FALSE);
1260 }
1261 
1262 /*
1263  * Hide the fact that zoneids and netstackids are allocated from
1264  * the same space in the current implementation.
1265  * We currently do not check that the stackid/zoneids are valid, since there
1266  * is no need for that. But this should only be done for ids that are
1267  * valid.
1268  */
1269 zoneid_t
1270 netstackid_to_zoneid(netstackid_t stackid)
1271 {
1272 	return (stackid);
1273 }
1274 
1275 netstackid_t
1276 zoneid_to_netstackid(zoneid_t zoneid)
1277 {
1278 	if (netstack_find_shared_zoneid(zoneid))
1279 		return (GLOBAL_ZONEID);
1280 	else
1281 		return (zoneid);
1282 }
1283 
1284 /*
1285  * Simplistic support for walking all the handles.
1286  * Example usage:
1287  *	netstack_handle_t nh;
1288  *	netstack_t *ns;
1289  *
1290  *	netstack_next_init(&nh);
1291  *	while ((ns = netstack_next(&nh)) != NULL) {
1292  *		do something;
1293  *		netstack_rele(ns);
1294  *	}
1295  *	netstack_next_fini(&nh);
1296  */
1297 void
1298 netstack_next_init(netstack_handle_t *handle)
1299 {
1300 	*handle = 0;
1301 }
1302 
1303 /* ARGSUSED */
1304 void
1305 netstack_next_fini(netstack_handle_t *handle)
1306 {
1307 }
1308 
1309 netstack_t *
1310 netstack_next(netstack_handle_t *handle)
1311 {
1312 	netstack_t *ns;
1313 	int i, end;
1314 
1315 	end = *handle;
1316 	/* Walk skipping *handle number of instances */
1317 
1318 	/* Look if there is a matching stack instance */
1319 	mutex_enter(&netstack_g_lock);
1320 	ns = netstack_head;
1321 	for (i = 0; i < end; i++) {
1322 		if (ns == NULL)
1323 			break;
1324 		ns = ns->netstack_next;
1325 	}
1326 	/* skip those with that aren't really here */
1327 	while (ns != NULL) {
1328 		mutex_enter(&ns->netstack_lock);
1329 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1330 			mutex_exit(&ns->netstack_lock);
1331 			break;
1332 		}
1333 		mutex_exit(&ns->netstack_lock);
1334 		end++;
1335 		ns = ns->netstack_next;
1336 	}
1337 	if (ns != NULL) {
1338 		*handle = end + 1;
1339 		netstack_hold(ns);
1340 	}
1341 	mutex_exit(&netstack_g_lock);
1342 	return (ns);
1343 }
1344