xref: /titanic_51/usr/src/uts/common/os/netstack.c (revision bbaa8b60dd95d714741fc474adad3cf710ef4efd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/sysmacros.h>
29 #include <sys/vm.h>
30 #include <sys/proc.h>
31 #include <sys/tuneable.h>
32 #include <sys/systm.h>
33 #include <sys/cmn_err.h>
34 #include <sys/debug.h>
35 #include <sys/sdt.h>
36 #include <sys/mutex.h>
37 #include <sys/bitmap.h>
38 #include <sys/atomic.h>
39 #include <sys/kobj.h>
40 #include <sys/disp.h>
41 #include <vm/seg_kmem.h>
42 #include <sys/zone.h>
43 #include <sys/netstack.h>
44 
45 /*
46  * What we use so that the zones framework can tell us about new zones,
47  * which we use to create new stacks.
48  */
49 static zone_key_t netstack_zone_key;
50 
51 static int	netstack_initialized = 0;
52 
53 /*
54  * Track the registered netstacks.
55  * The global lock protects
56  * - ns_reg
57  * - the list starting at netstack_head and following the netstack_next
58  *   pointers.
59  */
60 static kmutex_t netstack_g_lock;
61 
62 /*
63  * Registry of netstacks with their create/shutdown/destory functions.
64  */
65 static struct netstack_registry	ns_reg[NS_MAX];
66 
67 /*
68  * Global list of existing stacks.  We use this when a new zone with
69  * an exclusive IP instance is created.
70  *
71  * Note that in some cases a netstack_t needs to stay around after the zone
72  * has gone away. This is because there might be outstanding references
73  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
74  * structure and all the foo_stack_t's hanging off of it will be cleaned up
75  * when the last reference to it is dropped.
76  * However, the same zone might be rebooted. That is handled using the
77  * assumption that the zones framework picks a new zoneid each time a zone
78  * is (re)booted. We assert for that condition in netstack_zone_create().
79  * Thus the old netstack_t can take its time for things to time out.
80  */
81 static netstack_t *netstack_head;
82 
83 /*
84  * To support kstat_create_netstack() using kstat_zone_add we need
85  * to track both
86  *  - all zoneids that use the global/shared stack
87  *  - all kstats that have been added for the shared stack
88  */
89 struct shared_zone_list {
90 	struct shared_zone_list *sz_next;
91 	zoneid_t		sz_zoneid;
92 };
93 
94 struct shared_kstat_list {
95 	struct shared_kstat_list *sk_next;
96 	kstat_t			 *sk_kstat;
97 };
98 
99 static kmutex_t netstack_shared_lock;	/* protects the following two */
100 static struct shared_zone_list	*netstack_shared_zones;
101 static struct shared_kstat_list	*netstack_shared_kstats;
102 
103 static void	*netstack_zone_create(zoneid_t zoneid);
104 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
105 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
106 
107 static void	netstack_shared_zone_add(zoneid_t zoneid);
108 static void	netstack_shared_zone_remove(zoneid_t zoneid);
109 static void	netstack_shared_kstat_add(kstat_t *ks);
110 static void	netstack_shared_kstat_remove(kstat_t *ks);
111 
112 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
113 
114 static void	apply_all_netstacks(int, applyfn_t *);
115 static void	apply_all_modules(netstack_t *, applyfn_t *);
116 static void	apply_all_modules_reverse(netstack_t *, applyfn_t *);
117 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
118 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
119 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
120 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
121 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
122     kmutex_t *);
123 
124 void
125 netstack_init(void)
126 {
127 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
128 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
129 
130 	netstack_initialized = 1;
131 
132 	/*
133 	 * We want to be informed each time a zone is created or
134 	 * destroyed in the kernel, so we can maintain the
135 	 * stack instance information.
136 	 */
137 	zone_key_create(&netstack_zone_key, netstack_zone_create,
138 	    netstack_zone_shutdown, netstack_zone_destroy);
139 }
140 
141 /*
142  * Register a new module with the framework.
143  * This registers interest in changes to the set of netstacks.
144  * The createfn and destroyfn are required, but the shutdownfn can be
145  * NULL.
146  * Note that due to the current zsd implementation, when the create
147  * function is called the zone isn't fully present, thus functions
148  * like zone_find_by_* will fail, hence the create function can not
149  * use many zones kernel functions including zcmn_err().
150  */
151 void
152 netstack_register(int moduleid,
153     void *(*module_create)(netstackid_t, netstack_t *),
154     void (*module_shutdown)(netstackid_t, void *),
155     void (*module_destroy)(netstackid_t, void *))
156 {
157 	netstack_t *ns;
158 
159 	ASSERT(netstack_initialized);
160 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
161 	ASSERT(module_create != NULL);
162 
163 	/*
164 	 * Make instances created after this point in time run the create
165 	 * callback.
166 	 */
167 	mutex_enter(&netstack_g_lock);
168 	ASSERT(ns_reg[moduleid].nr_create == NULL);
169 	ASSERT(ns_reg[moduleid].nr_flags == 0);
170 	ns_reg[moduleid].nr_create = module_create;
171 	ns_reg[moduleid].nr_shutdown = module_shutdown;
172 	ns_reg[moduleid].nr_destroy = module_destroy;
173 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
174 
175 	/*
176 	 * Determine the set of stacks that exist before we drop the lock.
177 	 * Set NSS_CREATE_NEEDED for each of those.
178 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
179 	 * set, but check NSF_CLOSING to be sure.
180 	 */
181 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
182 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
183 
184 		mutex_enter(&ns->netstack_lock);
185 		if (!(ns->netstack_flags & NSF_CLOSING) &&
186 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
187 			nms->nms_flags |= NSS_CREATE_NEEDED;
188 			DTRACE_PROBE2(netstack__create__needed,
189 			    netstack_t *, ns, int, moduleid);
190 		}
191 		mutex_exit(&ns->netstack_lock);
192 	}
193 	mutex_exit(&netstack_g_lock);
194 
195 	/*
196 	 * At this point in time a new instance can be created or an instance
197 	 * can be destroyed, or some other module can register or unregister.
198 	 * Make sure we either run all the create functions for this moduleid
199 	 * or we wait for any other creators for this moduleid.
200 	 */
201 	apply_all_netstacks(moduleid, netstack_apply_create);
202 }
203 
204 void
205 netstack_unregister(int moduleid)
206 {
207 	netstack_t *ns;
208 
209 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
210 
211 	ASSERT(ns_reg[moduleid].nr_create != NULL);
212 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
213 
214 	mutex_enter(&netstack_g_lock);
215 	/*
216 	 * Determine the set of stacks that exist before we drop the lock.
217 	 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
218 	 * That ensures that when we return all the callbacks for existing
219 	 * instances have completed. And since we set NRF_DYING no new
220 	 * instances can use this module.
221 	 */
222 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
223 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
224 
225 		mutex_enter(&ns->netstack_lock);
226 		if (ns_reg[moduleid].nr_shutdown != NULL &&
227 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
228 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
229 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
230 			DTRACE_PROBE2(netstack__shutdown__needed,
231 			    netstack_t *, ns, int, moduleid);
232 		}
233 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
234 		    ns_reg[moduleid].nr_destroy != NULL &&
235 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
236 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
237 			nms->nms_flags |= NSS_DESTROY_NEEDED;
238 			DTRACE_PROBE2(netstack__destroy__needed,
239 			    netstack_t *, ns, int, moduleid);
240 		}
241 		mutex_exit(&ns->netstack_lock);
242 	}
243 	/*
244 	 * Prevent any new netstack from calling the registered create
245 	 * function, while keeping the function pointers in place until the
246 	 * shutdown and destroy callbacks are complete.
247 	 */
248 	ns_reg[moduleid].nr_flags |= NRF_DYING;
249 	mutex_exit(&netstack_g_lock);
250 
251 	apply_all_netstacks(moduleid, netstack_apply_shutdown);
252 	apply_all_netstacks(moduleid, netstack_apply_destroy);
253 
254 	/*
255 	 * Clear the nms_flags so that we can handle this module
256 	 * being loaded again.
257 	 * Also remove the registered functions.
258 	 */
259 	mutex_enter(&netstack_g_lock);
260 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
261 	ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
262 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
263 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
264 
265 		mutex_enter(&ns->netstack_lock);
266 		if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
267 			nms->nms_flags = 0;
268 			DTRACE_PROBE2(netstack__destroy__done,
269 			    netstack_t *, ns, int, moduleid);
270 		}
271 		mutex_exit(&ns->netstack_lock);
272 	}
273 
274 	ns_reg[moduleid].nr_create = NULL;
275 	ns_reg[moduleid].nr_shutdown = NULL;
276 	ns_reg[moduleid].nr_destroy = NULL;
277 	ns_reg[moduleid].nr_flags = 0;
278 	mutex_exit(&netstack_g_lock);
279 }
280 
281 /*
282  * Lookup and/or allocate a netstack for this zone.
283  */
284 static void *
285 netstack_zone_create(zoneid_t zoneid)
286 {
287 	netstackid_t stackid;
288 	netstack_t *ns;
289 	netstack_t **nsp;
290 	zone_t	*zone;
291 	int i;
292 
293 	ASSERT(netstack_initialized);
294 
295 	zone = zone_find_by_id_nolock(zoneid);
296 	ASSERT(zone != NULL);
297 
298 	if (zone->zone_flags & ZF_NET_EXCL) {
299 		stackid = zoneid;
300 	} else {
301 		/* Look for the stack instance for the global */
302 		stackid = GLOBAL_NETSTACKID;
303 	}
304 
305 	/* Allocate even if it isn't needed; simplifies locking */
306 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
307 
308 	/* Look if there is a matching stack instance */
309 	mutex_enter(&netstack_g_lock);
310 	for (nsp = &netstack_head; *nsp != NULL;
311 	    nsp = &((*nsp)->netstack_next)) {
312 		if ((*nsp)->netstack_stackid == stackid) {
313 			/*
314 			 * Should never find a pre-existing exclusive stack
315 			 */
316 			ASSERT(stackid == GLOBAL_NETSTACKID);
317 			kmem_free(ns, sizeof (netstack_t));
318 			ns = *nsp;
319 			mutex_enter(&ns->netstack_lock);
320 			ns->netstack_numzones++;
321 			mutex_exit(&ns->netstack_lock);
322 			mutex_exit(&netstack_g_lock);
323 			DTRACE_PROBE1(netstack__inc__numzones,
324 			    netstack_t *, ns);
325 			/* Record that we have a new shared stack zone */
326 			netstack_shared_zone_add(zoneid);
327 			zone->zone_netstack = ns;
328 			return (ns);
329 		}
330 	}
331 	/* Not found */
332 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
333 	cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
334 	ns->netstack_stackid = zoneid;
335 	ns->netstack_numzones = 1;
336 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
337 	ns->netstack_flags = NSF_UNINIT;
338 	*nsp = ns;
339 	zone->zone_netstack = ns;
340 
341 	mutex_enter(&ns->netstack_lock);
342 	/*
343 	 * Mark this netstack as having a CREATE running so
344 	 * any netstack_register/netstack_unregister waits for
345 	 * the existing create callbacks to complete in moduleid order
346 	 */
347 	ns->netstack_flags |= NSF_ZONE_CREATE;
348 
349 	/*
350 	 * Determine the set of module create functions that need to be
351 	 * called before we drop the lock.
352 	 * Set NSS_CREATE_NEEDED for each of those.
353 	 * Skip any with NRF_DYING set, since those are in the process of
354 	 * going away, by checking for flags being exactly NRF_REGISTERED.
355 	 */
356 	for (i = 0; i < NS_MAX; i++) {
357 		nm_state_t *nms = &ns->netstack_m_state[i];
358 
359 		cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
360 
361 		if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
362 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
363 			nms->nms_flags |= NSS_CREATE_NEEDED;
364 			DTRACE_PROBE2(netstack__create__needed,
365 			    netstack_t *, ns, int, i);
366 		}
367 	}
368 	mutex_exit(&ns->netstack_lock);
369 	mutex_exit(&netstack_g_lock);
370 
371 	apply_all_modules(ns, netstack_apply_create);
372 
373 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
374 	mutex_enter(&ns->netstack_lock);
375 	ns->netstack_flags &= ~NSF_UNINIT;
376 	ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
377 	ns->netstack_flags &= ~NSF_ZONE_CREATE;
378 	cv_broadcast(&ns->netstack_cv);
379 	mutex_exit(&ns->netstack_lock);
380 
381 	return (ns);
382 }
383 
384 /* ARGSUSED */
385 static void
386 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
387 {
388 	netstack_t *ns = (netstack_t *)arg;
389 	int i;
390 
391 	ASSERT(arg != NULL);
392 
393 	mutex_enter(&ns->netstack_lock);
394 	ASSERT(ns->netstack_numzones > 0);
395 	if (ns->netstack_numzones != 1) {
396 		/* Stack instance being used by other zone */
397 		mutex_exit(&ns->netstack_lock);
398 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
399 		return;
400 	}
401 	mutex_exit(&ns->netstack_lock);
402 
403 	mutex_enter(&netstack_g_lock);
404 	mutex_enter(&ns->netstack_lock);
405 	/*
406 	 * Mark this netstack as having a SHUTDOWN running so
407 	 * any netstack_register/netstack_unregister waits for
408 	 * the existing create callbacks to complete in moduleid order
409 	 */
410 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
411 	ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
412 
413 	/*
414 	 * Determine the set of stacks that exist before we drop the lock.
415 	 * Set NSS_SHUTDOWN_NEEDED for each of those.
416 	 */
417 	for (i = 0; i < NS_MAX; i++) {
418 		nm_state_t *nms = &ns->netstack_m_state[i];
419 
420 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
421 		    ns_reg[i].nr_shutdown != NULL &&
422 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
423 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
424 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
425 			DTRACE_PROBE2(netstack__shutdown__needed,
426 			    netstack_t *, ns, int, i);
427 		}
428 	}
429 	mutex_exit(&ns->netstack_lock);
430 	mutex_exit(&netstack_g_lock);
431 
432 	/*
433 	 * Call the shutdown function for all registered modules for this
434 	 * netstack.
435 	 */
436 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
437 
438 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
439 	mutex_enter(&ns->netstack_lock);
440 	ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
441 	ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
442 	cv_broadcast(&ns->netstack_cv);
443 	mutex_exit(&ns->netstack_lock);
444 }
445 
446 /*
447  * Common routine to release a zone.
448  * If this was the last zone using the stack instance then prepare to
449  * have the refcnt dropping to zero free the zone.
450  */
451 /* ARGSUSED */
452 static void
453 netstack_zone_destroy(zoneid_t zoneid, void *arg)
454 {
455 	netstack_t *ns = (netstack_t *)arg;
456 
457 	ASSERT(arg != NULL);
458 
459 	mutex_enter(&ns->netstack_lock);
460 	ASSERT(ns->netstack_numzones > 0);
461 	ns->netstack_numzones--;
462 	if (ns->netstack_numzones != 0) {
463 		/* Stack instance being used by other zone */
464 		mutex_exit(&ns->netstack_lock);
465 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
466 		/* Record that we a shared stack zone has gone away */
467 		netstack_shared_zone_remove(zoneid);
468 		return;
469 	}
470 	/*
471 	 * Set CLOSING so that netstack_find_by will not find it.
472 	 */
473 	ns->netstack_flags |= NSF_CLOSING;
474 	mutex_exit(&ns->netstack_lock);
475 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
476 	/* No other thread can call zone_destroy for this stack */
477 
478 	/*
479 	 * Decrease refcnt to account for the one in netstack_zone_init()
480 	 */
481 	netstack_rele(ns);
482 }
483 
484 /*
485  * Called when the reference count drops to zero.
486  * Call the destroy functions for each registered module.
487  */
488 static void
489 netstack_stack_inactive(netstack_t *ns)
490 {
491 	int i;
492 
493 	mutex_enter(&netstack_g_lock);
494 	mutex_enter(&ns->netstack_lock);
495 	/*
496 	 * Mark this netstack as having a DESTROY running so
497 	 * any netstack_register/netstack_unregister waits for
498 	 * the existing destroy callbacks to complete in reverse moduleid order
499 	 */
500 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
501 	ns->netstack_flags |= NSF_ZONE_DESTROY;
502 	/*
503 	 * If the shutdown callback wasn't called earlier (e.g., if this is
504 	 * a netstack shared between multiple zones), then we schedule it now.
505 	 *
506 	 * Determine the set of stacks that exist before we drop the lock.
507 	 * Set NSS_DESTROY_NEEDED for each of those. That
508 	 * ensures that when we return all the callbacks for existing
509 	 * instances have completed.
510 	 */
511 	for (i = 0; i < NS_MAX; i++) {
512 		nm_state_t *nms = &ns->netstack_m_state[i];
513 
514 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
515 		    ns_reg[i].nr_shutdown != NULL &&
516 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
517 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
518 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
519 			DTRACE_PROBE2(netstack__shutdown__needed,
520 			    netstack_t *, ns, int, i);
521 		}
522 
523 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
524 		    ns_reg[i].nr_destroy != NULL &&
525 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
526 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
527 			nms->nms_flags |= NSS_DESTROY_NEEDED;
528 			DTRACE_PROBE2(netstack__destroy__needed,
529 			    netstack_t *, ns, int, i);
530 		}
531 	}
532 	mutex_exit(&ns->netstack_lock);
533 	mutex_exit(&netstack_g_lock);
534 
535 	/*
536 	 * Call the shutdown and destroy functions for all registered modules
537 	 * for this netstack.
538 	 *
539 	 * Since there are some ordering dependencies between the modules we
540 	 * tear them down in the reverse order of what was used to create them.
541 	 *
542 	 * Since a netstack_t is never reused (when a zone is rebooted it gets
543 	 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
544 	 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
545 	 * That is different than in the netstack_unregister() case.
546 	 */
547 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
548 	apply_all_modules_reverse(ns, netstack_apply_destroy);
549 
550 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
551 	mutex_enter(&ns->netstack_lock);
552 	ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
553 	ns->netstack_flags &= ~NSF_ZONE_DESTROY;
554 	cv_broadcast(&ns->netstack_cv);
555 	mutex_exit(&ns->netstack_lock);
556 }
557 
558 /*
559  * Apply a function to all netstacks for a particular moduleid.
560  *
561  * If there is any zone activity (due to a zone being created, shutdown,
562  * or destroyed) we wait for that to complete before we proceed. This ensures
563  * that the moduleids are processed in order when a zone is created or
564  * destroyed.
565  *
566  * The applyfn has to drop netstack_g_lock if it does some work.
567  * In that case we don't follow netstack_next,
568  * even if it is possible to do so without any hazards. This is
569  * because we want the design to allow for the list of netstacks threaded
570  * by netstack_next to change in any arbitrary way during the time the
571  * lock was dropped.
572  *
573  * It is safe to restart the loop at netstack_head since the applyfn
574  * changes netstack_m_state as it processes things, so a subsequent
575  * pass through will have no effect in applyfn, hence the loop will terminate
576  * in at worst O(N^2).
577  */
578 static void
579 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
580 {
581 	netstack_t *ns;
582 
583 	mutex_enter(&netstack_g_lock);
584 	ns = netstack_head;
585 	while (ns != NULL) {
586 		if (wait_for_zone_creator(ns, &netstack_g_lock)) {
587 			/* Lock dropped - restart at head */
588 			ns = netstack_head;
589 		} else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
590 			/* Lock dropped - restart at head */
591 			ns = netstack_head;
592 		} else {
593 			ns = ns->netstack_next;
594 		}
595 	}
596 	mutex_exit(&netstack_g_lock);
597 }
598 
599 /*
600  * Apply a function to all moduleids for a particular netstack.
601  *
602  * Since the netstack linkage doesn't matter in this case we can
603  * ignore whether the function drops the lock.
604  */
605 static void
606 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
607 {
608 	int i;
609 
610 	mutex_enter(&netstack_g_lock);
611 	for (i = 0; i < NS_MAX; i++) {
612 		/*
613 		 * We don't care whether the lock was dropped
614 		 * since we are not iterating over netstack_head.
615 		 */
616 		(void) (applyfn)(&netstack_g_lock, ns, i);
617 	}
618 	mutex_exit(&netstack_g_lock);
619 }
620 
621 /* Like the above but in reverse moduleid order */
622 static void
623 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
624 {
625 	int i;
626 
627 	mutex_enter(&netstack_g_lock);
628 	for (i = NS_MAX-1; i >= 0; i--) {
629 		/*
630 		 * We don't care whether the lock was dropped
631 		 * since we are not iterating over netstack_head.
632 		 */
633 		(void) (applyfn)(&netstack_g_lock, ns, i);
634 	}
635 	mutex_exit(&netstack_g_lock);
636 }
637 
638 /*
639  * Call the create function for the ns and moduleid if CREATE_NEEDED
640  * is set.
641  * If some other thread gets here first and sets *_INPROGRESS, then
642  * we wait for that thread to complete so that we can ensure that
643  * all the callbacks are done when we've looped over all netstacks/moduleids.
644  *
645  * When we call the create function, we temporarily drop the netstack_lock
646  * held by the caller, and return true to tell the caller it needs to
647  * re-evalute the state.
648  */
649 static boolean_t
650 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
651 {
652 	void *result;
653 	netstackid_t stackid;
654 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
655 	boolean_t dropped = B_FALSE;
656 
657 	ASSERT(MUTEX_HELD(lockp));
658 	mutex_enter(&ns->netstack_lock);
659 
660 	if (wait_for_nms_inprogress(ns, nms, lockp))
661 		dropped = B_TRUE;
662 
663 	if (nms->nms_flags & NSS_CREATE_NEEDED) {
664 		nms->nms_flags &= ~NSS_CREATE_NEEDED;
665 		nms->nms_flags |= NSS_CREATE_INPROGRESS;
666 		DTRACE_PROBE2(netstack__create__inprogress,
667 		    netstack_t *, ns, int, moduleid);
668 		mutex_exit(&ns->netstack_lock);
669 		mutex_exit(lockp);
670 		dropped = B_TRUE;
671 
672 		ASSERT(ns_reg[moduleid].nr_create != NULL);
673 		stackid = ns->netstack_stackid;
674 		DTRACE_PROBE2(netstack__create__start,
675 		    netstackid_t, stackid,
676 		    netstack_t *, ns);
677 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
678 		DTRACE_PROBE2(netstack__create__end,
679 		    void *, result, netstack_t *, ns);
680 
681 		ASSERT(result != NULL);
682 		mutex_enter(lockp);
683 		mutex_enter(&ns->netstack_lock);
684 		ns->netstack_modules[moduleid] = result;
685 		nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
686 		nms->nms_flags |= NSS_CREATE_COMPLETED;
687 		cv_broadcast(&nms->nms_cv);
688 		DTRACE_PROBE2(netstack__create__completed,
689 		    netstack_t *, ns, int, moduleid);
690 		mutex_exit(&ns->netstack_lock);
691 		return (dropped);
692 	} else {
693 		mutex_exit(&ns->netstack_lock);
694 		return (dropped);
695 	}
696 }
697 
698 /*
699  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
700  * is set.
701  * If some other thread gets here first and sets *_INPROGRESS, then
702  * we wait for that thread to complete so that we can ensure that
703  * all the callbacks are done when we've looped over all netstacks/moduleids.
704  *
705  * When we call the shutdown function, we temporarily drop the netstack_lock
706  * held by the caller, and return true to tell the caller it needs to
707  * re-evalute the state.
708  */
709 static boolean_t
710 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
711 {
712 	netstackid_t stackid;
713 	void * netstack_module;
714 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
715 	boolean_t dropped = B_FALSE;
716 
717 	ASSERT(MUTEX_HELD(lockp));
718 	mutex_enter(&ns->netstack_lock);
719 
720 	if (wait_for_nms_inprogress(ns, nms, lockp))
721 		dropped = B_TRUE;
722 
723 	if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
724 		nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
725 		nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
726 		DTRACE_PROBE2(netstack__shutdown__inprogress,
727 		    netstack_t *, ns, int, moduleid);
728 		mutex_exit(&ns->netstack_lock);
729 		mutex_exit(lockp);
730 		dropped = B_TRUE;
731 
732 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
733 		stackid = ns->netstack_stackid;
734 		netstack_module = ns->netstack_modules[moduleid];
735 		DTRACE_PROBE2(netstack__shutdown__start,
736 		    netstackid_t, stackid,
737 		    void *, netstack_module);
738 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
739 		DTRACE_PROBE1(netstack__shutdown__end,
740 		    netstack_t *, ns);
741 
742 		mutex_enter(lockp);
743 		mutex_enter(&ns->netstack_lock);
744 		nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
745 		nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
746 		cv_broadcast(&nms->nms_cv);
747 		DTRACE_PROBE2(netstack__shutdown__completed,
748 		    netstack_t *, ns, int, moduleid);
749 		mutex_exit(&ns->netstack_lock);
750 		return (dropped);
751 	} else {
752 		mutex_exit(&ns->netstack_lock);
753 		return (dropped);
754 	}
755 }
756 
757 /*
758  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
759  * is set.
760  * If some other thread gets here first and sets *_INPROGRESS, then
761  * we wait for that thread to complete so that we can ensure that
762  * all the callbacks are done when we've looped over all netstacks/moduleids.
763  *
764  * When we call the destroy function, we temporarily drop the netstack_lock
765  * held by the caller, and return true to tell the caller it needs to
766  * re-evalute the state.
767  */
768 static boolean_t
769 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
770 {
771 	netstackid_t stackid;
772 	void * netstack_module;
773 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
774 	boolean_t dropped = B_FALSE;
775 
776 	ASSERT(MUTEX_HELD(lockp));
777 	mutex_enter(&ns->netstack_lock);
778 
779 	if (wait_for_nms_inprogress(ns, nms, lockp))
780 		dropped = B_TRUE;
781 
782 	if (nms->nms_flags & NSS_DESTROY_NEEDED) {
783 		nms->nms_flags &= ~NSS_DESTROY_NEEDED;
784 		nms->nms_flags |= NSS_DESTROY_INPROGRESS;
785 		DTRACE_PROBE2(netstack__destroy__inprogress,
786 		    netstack_t *, ns, int, moduleid);
787 		mutex_exit(&ns->netstack_lock);
788 		mutex_exit(lockp);
789 		dropped = B_TRUE;
790 
791 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
792 		stackid = ns->netstack_stackid;
793 		netstack_module = ns->netstack_modules[moduleid];
794 		DTRACE_PROBE2(netstack__destroy__start,
795 		    netstackid_t, stackid,
796 		    void *, netstack_module);
797 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
798 		DTRACE_PROBE1(netstack__destroy__end,
799 		    netstack_t *, ns);
800 
801 		mutex_enter(lockp);
802 		mutex_enter(&ns->netstack_lock);
803 		ns->netstack_modules[moduleid] = NULL;
804 		nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
805 		nms->nms_flags |= NSS_DESTROY_COMPLETED;
806 		cv_broadcast(&nms->nms_cv);
807 		DTRACE_PROBE2(netstack__destroy__completed,
808 		    netstack_t *, ns, int, moduleid);
809 		mutex_exit(&ns->netstack_lock);
810 		return (dropped);
811 	} else {
812 		mutex_exit(&ns->netstack_lock);
813 		return (dropped);
814 	}
815 }
816 
817 /*
818  * If somebody  is creating the netstack (due to a new zone being created)
819  * then we wait for them to complete. This ensures that any additional
820  * netstack_register() doesn't cause the create functions to run out of
821  * order.
822  * Note that we do not need such a global wait in the case of the shutdown
823  * and destroy callbacks, since in that case it is sufficient for both
824  * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
825  * Returns true if lockp was temporarily dropped while waiting.
826  */
827 static boolean_t
828 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
829 {
830 	boolean_t dropped = B_FALSE;
831 
832 	mutex_enter(&ns->netstack_lock);
833 	while (ns->netstack_flags & NSF_ZONE_CREATE) {
834 		DTRACE_PROBE1(netstack__wait__zone__inprogress,
835 		    netstack_t *, ns);
836 		if (lockp != NULL) {
837 			dropped = B_TRUE;
838 			mutex_exit(lockp);
839 		}
840 		cv_wait(&ns->netstack_cv, &ns->netstack_lock);
841 		if (lockp != NULL) {
842 			/* First drop netstack_lock to preserve order */
843 			mutex_exit(&ns->netstack_lock);
844 			mutex_enter(lockp);
845 			mutex_enter(&ns->netstack_lock);
846 		}
847 	}
848 	mutex_exit(&ns->netstack_lock);
849 	return (dropped);
850 }
851 
852 /*
853  * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
854  * combination.
855  * Returns true if lockp was temporarily dropped while waiting.
856  */
857 static boolean_t
858 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
859 {
860 	boolean_t dropped = B_FALSE;
861 
862 	while (nms->nms_flags & NSS_ALL_INPROGRESS) {
863 		DTRACE_PROBE2(netstack__wait__nms__inprogress,
864 		    netstack_t *, ns, nm_state_t *, nms);
865 		if (lockp != NULL) {
866 			dropped = B_TRUE;
867 			mutex_exit(lockp);
868 		}
869 		cv_wait(&nms->nms_cv, &ns->netstack_lock);
870 		if (lockp != NULL) {
871 			/* First drop netstack_lock to preserve order */
872 			mutex_exit(&ns->netstack_lock);
873 			mutex_enter(lockp);
874 			mutex_enter(&ns->netstack_lock);
875 		}
876 	}
877 	return (dropped);
878 }
879 
880 /*
881  * Get the stack instance used in caller's zone.
882  * Increases the reference count, caller must do a netstack_rele.
883  * It can't be called after zone_destroy() has started.
884  */
885 netstack_t *
886 netstack_get_current(void)
887 {
888 	netstack_t *ns;
889 
890 	ns = curproc->p_zone->zone_netstack;
891 	ASSERT(ns != NULL);
892 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
893 		return (NULL);
894 
895 	netstack_hold(ns);
896 
897 	return (ns);
898 }
899 
900 /*
901  * Find a stack instance given the cred.
902  * This is used by the modules to potentially allow for a future when
903  * something other than the zoneid is used to determine the stack.
904  */
905 netstack_t *
906 netstack_find_by_cred(const cred_t *cr)
907 {
908 	zoneid_t zoneid = crgetzoneid(cr);
909 
910 	/* Handle the case when cr_zone is NULL */
911 	if (zoneid == (zoneid_t)-1)
912 		zoneid = GLOBAL_ZONEID;
913 
914 	/* For performance ... */
915 	if (curproc->p_zone->zone_id == zoneid)
916 		return (netstack_get_current());
917 	else
918 		return (netstack_find_by_zoneid(zoneid));
919 }
920 
921 /*
922  * Find a stack instance given the zoneid.
923  * Increases the reference count if found; caller must do a
924  * netstack_rele().
925  *
926  * If there is no exact match then assume the shared stack instance
927  * matches.
928  *
929  * Skip the unitialized ones.
930  */
931 netstack_t *
932 netstack_find_by_zoneid(zoneid_t zoneid)
933 {
934 	netstack_t *ns;
935 	zone_t *zone;
936 
937 	zone = zone_find_by_id(zoneid);
938 
939 	if (zone == NULL)
940 		return (NULL);
941 
942 	ns = zone->zone_netstack;
943 	ASSERT(ns != NULL);
944 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
945 		ns = NULL;
946 	else
947 		netstack_hold(ns);
948 
949 	zone_rele(zone);
950 	return (ns);
951 }
952 
953 /*
954  * Find a stack instance given the zoneid. Can only be called from
955  * the create callback. See the comments in zone_find_by_id_nolock why
956  * that limitation exists.
957  *
958  * Increases the reference count if found; caller must do a
959  * netstack_rele().
960  *
961  * If there is no exact match then assume the shared stack instance
962  * matches.
963  *
964  * Skip the unitialized ones.
965  */
966 netstack_t *
967 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
968 {
969 	netstack_t *ns;
970 	zone_t *zone;
971 
972 	zone = zone_find_by_id_nolock(zoneid);
973 
974 	if (zone == NULL)
975 		return (NULL);
976 
977 	ns = zone->zone_netstack;
978 	ASSERT(ns != NULL);
979 
980 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
981 		ns = NULL;
982 	else
983 		netstack_hold(ns);
984 
985 	/* zone_find_by_id_nolock does not have a hold on the zone */
986 	return (ns);
987 }
988 
989 /*
990  * Find a stack instance given the stackid with exact match?
991  * Increases the reference count if found; caller must do a
992  * netstack_rele().
993  *
994  * Skip the unitialized ones.
995  */
996 netstack_t *
997 netstack_find_by_stackid(netstackid_t stackid)
998 {
999 	netstack_t *ns;
1000 
1001 	mutex_enter(&netstack_g_lock);
1002 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1003 		mutex_enter(&ns->netstack_lock);
1004 		if (ns->netstack_stackid == stackid &&
1005 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1006 			mutex_exit(&ns->netstack_lock);
1007 			netstack_hold(ns);
1008 			mutex_exit(&netstack_g_lock);
1009 			return (ns);
1010 		}
1011 		mutex_exit(&ns->netstack_lock);
1012 	}
1013 	mutex_exit(&netstack_g_lock);
1014 	return (NULL);
1015 }
1016 
1017 void
1018 netstack_rele(netstack_t *ns)
1019 {
1020 	netstack_t **nsp;
1021 	boolean_t found;
1022 	int refcnt, numzones;
1023 	int i;
1024 
1025 	mutex_enter(&ns->netstack_lock);
1026 	ASSERT(ns->netstack_refcnt > 0);
1027 	ns->netstack_refcnt--;
1028 	/*
1029 	 * As we drop the lock additional netstack_rele()s can come in
1030 	 * and decrement the refcnt to zero and free the netstack_t.
1031 	 * Store pointers in local variables and if we were not the last
1032 	 * then don't reference the netstack_t after that.
1033 	 */
1034 	refcnt = ns->netstack_refcnt;
1035 	numzones = ns->netstack_numzones;
1036 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1037 	mutex_exit(&ns->netstack_lock);
1038 
1039 	if (refcnt == 0 && numzones == 0) {
1040 		/*
1041 		 * Time to call the destroy functions and free up
1042 		 * the structure
1043 		 */
1044 		netstack_stack_inactive(ns);
1045 
1046 		/* Make sure nothing increased the references */
1047 		ASSERT(ns->netstack_refcnt == 0);
1048 		ASSERT(ns->netstack_numzones == 0);
1049 
1050 		/* Finally remove from list of netstacks */
1051 		mutex_enter(&netstack_g_lock);
1052 		found = B_FALSE;
1053 		for (nsp = &netstack_head; *nsp != NULL;
1054 		    nsp = &(*nsp)->netstack_next) {
1055 			if (*nsp == ns) {
1056 				*nsp = ns->netstack_next;
1057 				ns->netstack_next = NULL;
1058 				found = B_TRUE;
1059 				break;
1060 			}
1061 		}
1062 		ASSERT(found);
1063 		mutex_exit(&netstack_g_lock);
1064 
1065 		/* Make sure nothing increased the references */
1066 		ASSERT(ns->netstack_refcnt == 0);
1067 		ASSERT(ns->netstack_numzones == 0);
1068 
1069 		ASSERT(ns->netstack_flags & NSF_CLOSING);
1070 
1071 		for (i = 0; i < NS_MAX; i++) {
1072 			nm_state_t *nms = &ns->netstack_m_state[i];
1073 
1074 			cv_destroy(&nms->nms_cv);
1075 		}
1076 		mutex_destroy(&ns->netstack_lock);
1077 		cv_destroy(&ns->netstack_cv);
1078 		kmem_free(ns, sizeof (*ns));
1079 	}
1080 }
1081 
1082 void
1083 netstack_hold(netstack_t *ns)
1084 {
1085 	mutex_enter(&ns->netstack_lock);
1086 	ns->netstack_refcnt++;
1087 	ASSERT(ns->netstack_refcnt > 0);
1088 	mutex_exit(&ns->netstack_lock);
1089 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1090 }
1091 
1092 /*
1093  * To support kstat_create_netstack() using kstat_zone_add we need
1094  * to track both
1095  *  - all zoneids that use the global/shared stack
1096  *  - all kstats that have been added for the shared stack
1097  */
1098 kstat_t *
1099 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1100     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1101     netstackid_t ks_netstackid)
1102 {
1103 	kstat_t *ks;
1104 
1105 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1106 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1107 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1108 		if (ks != NULL)
1109 			netstack_shared_kstat_add(ks);
1110 		return (ks);
1111 	} else {
1112 		zoneid_t zoneid = ks_netstackid;
1113 
1114 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
1115 		    ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1116 	}
1117 }
1118 
1119 void
1120 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1121 {
1122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1123 		netstack_shared_kstat_remove(ks);
1124 	}
1125 	kstat_delete(ks);
1126 }
1127 
1128 static void
1129 netstack_shared_zone_add(zoneid_t zoneid)
1130 {
1131 	struct shared_zone_list *sz;
1132 	struct shared_kstat_list *sk;
1133 
1134 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1135 	sz->sz_zoneid = zoneid;
1136 
1137 	/* Insert in list */
1138 	mutex_enter(&netstack_shared_lock);
1139 	sz->sz_next = netstack_shared_zones;
1140 	netstack_shared_zones = sz;
1141 
1142 	/*
1143 	 * Perform kstat_zone_add for each existing shared stack kstat.
1144 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1145 	 */
1146 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1147 		kstat_zone_add(sk->sk_kstat, zoneid);
1148 	}
1149 	mutex_exit(&netstack_shared_lock);
1150 }
1151 
1152 static void
1153 netstack_shared_zone_remove(zoneid_t zoneid)
1154 {
1155 	struct shared_zone_list **szp, *sz;
1156 	struct shared_kstat_list *sk;
1157 
1158 	/* Find in list */
1159 	mutex_enter(&netstack_shared_lock);
1160 	sz = NULL;
1161 	for (szp = &netstack_shared_zones; *szp != NULL;
1162 	    szp = &((*szp)->sz_next)) {
1163 		if ((*szp)->sz_zoneid == zoneid) {
1164 			sz = *szp;
1165 			break;
1166 		}
1167 	}
1168 	/* We must find it */
1169 	ASSERT(sz != NULL);
1170 	*szp = sz->sz_next;
1171 	sz->sz_next = NULL;
1172 
1173 	/*
1174 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1175 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1176 	 */
1177 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1178 		kstat_zone_remove(sk->sk_kstat, zoneid);
1179 	}
1180 	mutex_exit(&netstack_shared_lock);
1181 
1182 	kmem_free(sz, sizeof (*sz));
1183 }
1184 
1185 static void
1186 netstack_shared_kstat_add(kstat_t *ks)
1187 {
1188 	struct shared_zone_list *sz;
1189 	struct shared_kstat_list *sk;
1190 
1191 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1192 	sk->sk_kstat = ks;
1193 
1194 	/* Insert in list */
1195 	mutex_enter(&netstack_shared_lock);
1196 	sk->sk_next = netstack_shared_kstats;
1197 	netstack_shared_kstats = sk;
1198 
1199 	/*
1200 	 * Perform kstat_zone_add for each existing shared stack zone.
1201 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1202 	 */
1203 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1204 		kstat_zone_add(ks, sz->sz_zoneid);
1205 	}
1206 	mutex_exit(&netstack_shared_lock);
1207 }
1208 
1209 static void
1210 netstack_shared_kstat_remove(kstat_t *ks)
1211 {
1212 	struct shared_zone_list *sz;
1213 	struct shared_kstat_list **skp, *sk;
1214 
1215 	/* Find in list */
1216 	mutex_enter(&netstack_shared_lock);
1217 	sk = NULL;
1218 	for (skp = &netstack_shared_kstats; *skp != NULL;
1219 	    skp = &((*skp)->sk_next)) {
1220 		if ((*skp)->sk_kstat == ks) {
1221 			sk = *skp;
1222 			break;
1223 		}
1224 	}
1225 	/* Must find it */
1226 	ASSERT(sk != NULL);
1227 	*skp = sk->sk_next;
1228 	sk->sk_next = NULL;
1229 
1230 	/*
1231 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1232 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1233 	 */
1234 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1235 		kstat_zone_remove(ks, sz->sz_zoneid);
1236 	}
1237 	mutex_exit(&netstack_shared_lock);
1238 	kmem_free(sk, sizeof (*sk));
1239 }
1240 
1241 /*
1242  * If a zoneid is part of the shared zone, return true
1243  */
1244 static boolean_t
1245 netstack_find_shared_zoneid(zoneid_t zoneid)
1246 {
1247 	struct shared_zone_list *sz;
1248 
1249 	mutex_enter(&netstack_shared_lock);
1250 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1251 		if (sz->sz_zoneid == zoneid) {
1252 			mutex_exit(&netstack_shared_lock);
1253 			return (B_TRUE);
1254 		}
1255 	}
1256 	mutex_exit(&netstack_shared_lock);
1257 	return (B_FALSE);
1258 }
1259 
1260 /*
1261  * Hide the fact that zoneids and netstackids are allocated from
1262  * the same space in the current implementation.
1263  * We currently do not check that the stackid/zoneids are valid, since there
1264  * is no need for that. But this should only be done for ids that are
1265  * valid.
1266  */
1267 zoneid_t
1268 netstackid_to_zoneid(netstackid_t stackid)
1269 {
1270 	return (stackid);
1271 }
1272 
1273 netstackid_t
1274 zoneid_to_netstackid(zoneid_t zoneid)
1275 {
1276 	if (netstack_find_shared_zoneid(zoneid))
1277 		return (GLOBAL_ZONEID);
1278 	else
1279 		return (zoneid);
1280 }
1281 
1282 zoneid_t
1283 netstack_get_zoneid(netstack_t *ns)
1284 {
1285 	return (netstackid_to_zoneid(ns->netstack_stackid));
1286 }
1287 
1288 /*
1289  * Simplistic support for walking all the handles.
1290  * Example usage:
1291  *	netstack_handle_t nh;
1292  *	netstack_t *ns;
1293  *
1294  *	netstack_next_init(&nh);
1295  *	while ((ns = netstack_next(&nh)) != NULL) {
1296  *		do something;
1297  *		netstack_rele(ns);
1298  *	}
1299  *	netstack_next_fini(&nh);
1300  */
1301 void
1302 netstack_next_init(netstack_handle_t *handle)
1303 {
1304 	*handle = 0;
1305 }
1306 
1307 /* ARGSUSED */
1308 void
1309 netstack_next_fini(netstack_handle_t *handle)
1310 {
1311 }
1312 
1313 netstack_t *
1314 netstack_next(netstack_handle_t *handle)
1315 {
1316 	netstack_t *ns;
1317 	int i, end;
1318 
1319 	end = *handle;
1320 	/* Walk skipping *handle number of instances */
1321 
1322 	/* Look if there is a matching stack instance */
1323 	mutex_enter(&netstack_g_lock);
1324 	ns = netstack_head;
1325 	for (i = 0; i < end; i++) {
1326 		if (ns == NULL)
1327 			break;
1328 		ns = ns->netstack_next;
1329 	}
1330 	/* skip those with that aren't really here */
1331 	while (ns != NULL) {
1332 		mutex_enter(&ns->netstack_lock);
1333 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1334 			mutex_exit(&ns->netstack_lock);
1335 			break;
1336 		}
1337 		mutex_exit(&ns->netstack_lock);
1338 		end++;
1339 		ns = ns->netstack_next;
1340 	}
1341 	if (ns != NULL) {
1342 		*handle = end + 1;
1343 		netstack_hold(ns);
1344 	}
1345 	mutex_exit(&netstack_g_lock);
1346 	return (ns);
1347 }
1348