xref: /titanic_50/usr/src/uts/common/os/netstack.c (revision ed31198c686205a26320612d2a5dd7b26ae63a15)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/sysmacros.h>
29 #include <sys/vm.h>
30 #include <sys/proc.h>
31 #include <sys/tuneable.h>
32 #include <sys/systm.h>
33 #include <sys/cmn_err.h>
34 #include <sys/debug.h>
35 #include <sys/sdt.h>
36 #include <sys/mutex.h>
37 #include <sys/bitmap.h>
38 #include <sys/atomic.h>
39 #include <sys/kobj.h>
40 #include <sys/disp.h>
41 #include <vm/seg_kmem.h>
42 #include <sys/zone.h>
43 #include <sys/netstack.h>
44 
45 /*
46  * What we use so that the zones framework can tell us about new zones,
47  * which we use to create new stacks.
48  */
49 static zone_key_t netstack_zone_key;
50 
51 static int	netstack_initialized = 0;
52 
53 /*
54  * Track the registered netstacks.
55  * The global lock protects
56  * - ns_reg
57  * - the list starting at netstack_head and following the netstack_next
58  *   pointers.
59  */
60 static kmutex_t netstack_g_lock;
61 
62 /*
63  * Registry of netstacks with their create/shutdown/destory functions.
64  */
65 static struct netstack_registry	ns_reg[NS_MAX];
66 
67 /*
68  * Global list of existing stacks.  We use this when a new zone with
69  * an exclusive IP instance is created.
70  *
71  * Note that in some cases a netstack_t needs to stay around after the zone
72  * has gone away. This is because there might be outstanding references
73  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
74  * structure and all the foo_stack_t's hanging off of it will be cleaned up
75  * when the last reference to it is dropped.
76  * However, the same zone might be rebooted. That is handled using the
77  * assumption that the zones framework picks a new zoneid each time a zone
78  * is (re)booted. We assert for that condition in netstack_zone_create().
79  * Thus the old netstack_t can take its time for things to time out.
80  */
81 static netstack_t *netstack_head;
82 
83 /*
84  * To support kstat_create_netstack() using kstat_zone_add we need
85  * to track both
86  *  - all zoneids that use the global/shared stack
87  *  - all kstats that have been added for the shared stack
88  */
89 struct shared_zone_list {
90 	struct shared_zone_list *sz_next;
91 	zoneid_t		sz_zoneid;
92 };
93 
94 struct shared_kstat_list {
95 	struct shared_kstat_list *sk_next;
96 	kstat_t			 *sk_kstat;
97 };
98 
99 static kmutex_t netstack_shared_lock;	/* protects the following two */
100 static struct shared_zone_list	*netstack_shared_zones;
101 static struct shared_kstat_list	*netstack_shared_kstats;
102 
103 static void	*netstack_zone_create(zoneid_t zoneid);
104 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
105 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
106 
107 static void	netstack_shared_zone_add(zoneid_t zoneid);
108 static void	netstack_shared_zone_remove(zoneid_t zoneid);
109 static void	netstack_shared_kstat_add(kstat_t *ks);
110 static void	netstack_shared_kstat_remove(kstat_t *ks);
111 
112 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
113 
114 static void	apply_all_netstacks(int, applyfn_t *);
115 static void	apply_all_modules(netstack_t *, applyfn_t *);
116 static void	apply_all_modules_reverse(netstack_t *, applyfn_t *);
117 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
118 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
119 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
120 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
121 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
122     kmutex_t *);
123 
124 void
125 netstack_init(void)
126 {
127 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
128 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
129 
130 	netstack_initialized = 1;
131 
132 	/*
133 	 * We want to be informed each time a zone is created or
134 	 * destroyed in the kernel, so we can maintain the
135 	 * stack instance information.
136 	 */
137 	zone_key_create(&netstack_zone_key, netstack_zone_create,
138 	    netstack_zone_shutdown, netstack_zone_destroy);
139 }
140 
141 /*
142  * Register a new module with the framework.
143  * This registers interest in changes to the set of netstacks.
144  * The createfn and destroyfn are required, but the shutdownfn can be
145  * NULL.
146  * Note that due to the current zsd implementation, when the create
147  * function is called the zone isn't fully present, thus functions
148  * like zone_find_by_* will fail, hence the create function can not
149  * use many zones kernel functions including zcmn_err().
150  */
151 void
152 netstack_register(int moduleid,
153     void *(*module_create)(netstackid_t, netstack_t *),
154     void (*module_shutdown)(netstackid_t, void *),
155     void (*module_destroy)(netstackid_t, void *))
156 {
157 	netstack_t *ns;
158 
159 	ASSERT(netstack_initialized);
160 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
161 	ASSERT(module_create != NULL);
162 
163 	/*
164 	 * Make instances created after this point in time run the create
165 	 * callback.
166 	 */
167 	mutex_enter(&netstack_g_lock);
168 	ASSERT(ns_reg[moduleid].nr_create == NULL);
169 	ASSERT(ns_reg[moduleid].nr_flags == 0);
170 	ns_reg[moduleid].nr_create = module_create;
171 	ns_reg[moduleid].nr_shutdown = module_shutdown;
172 	ns_reg[moduleid].nr_destroy = module_destroy;
173 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
174 
175 	/*
176 	 * Determine the set of stacks that exist before we drop the lock.
177 	 * Set NSS_CREATE_NEEDED for each of those.
178 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
179 	 * set, but check NSF_CLOSING to be sure.
180 	 */
181 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
182 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
183 
184 		mutex_enter(&ns->netstack_lock);
185 		if (!(ns->netstack_flags & NSF_CLOSING) &&
186 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
187 			nms->nms_flags |= NSS_CREATE_NEEDED;
188 			DTRACE_PROBE2(netstack__create__needed,
189 			    netstack_t *, ns, int, moduleid);
190 		}
191 		mutex_exit(&ns->netstack_lock);
192 	}
193 	mutex_exit(&netstack_g_lock);
194 
195 	/*
196 	 * At this point in time a new instance can be created or an instance
197 	 * can be destroyed, or some other module can register or unregister.
198 	 * Make sure we either run all the create functions for this moduleid
199 	 * or we wait for any other creators for this moduleid.
200 	 */
201 	apply_all_netstacks(moduleid, netstack_apply_create);
202 }
203 
204 void
205 netstack_unregister(int moduleid)
206 {
207 	netstack_t *ns;
208 
209 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
210 
211 	ASSERT(ns_reg[moduleid].nr_create != NULL);
212 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
213 
214 	mutex_enter(&netstack_g_lock);
215 	/*
216 	 * Determine the set of stacks that exist before we drop the lock.
217 	 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
218 	 * That ensures that when we return all the callbacks for existing
219 	 * instances have completed. And since we set NRF_DYING no new
220 	 * instances can use this module.
221 	 */
222 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
223 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
224 
225 		mutex_enter(&ns->netstack_lock);
226 		if (ns_reg[moduleid].nr_shutdown != NULL &&
227 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
228 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
229 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
230 			DTRACE_PROBE2(netstack__shutdown__needed,
231 			    netstack_t *, ns, int, moduleid);
232 		}
233 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
234 		    ns_reg[moduleid].nr_destroy != NULL &&
235 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
236 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
237 			nms->nms_flags |= NSS_DESTROY_NEEDED;
238 			DTRACE_PROBE2(netstack__destroy__needed,
239 			    netstack_t *, ns, int, moduleid);
240 		}
241 		mutex_exit(&ns->netstack_lock);
242 	}
243 	/*
244 	 * Prevent any new netstack from calling the registered create
245 	 * function, while keeping the function pointers in place until the
246 	 * shutdown and destroy callbacks are complete.
247 	 */
248 	ns_reg[moduleid].nr_flags |= NRF_DYING;
249 	mutex_exit(&netstack_g_lock);
250 
251 	apply_all_netstacks(moduleid, netstack_apply_shutdown);
252 	apply_all_netstacks(moduleid, netstack_apply_destroy);
253 
254 	/*
255 	 * Clear the nms_flags so that we can handle this module
256 	 * being loaded again.
257 	 * Also remove the registered functions.
258 	 */
259 	mutex_enter(&netstack_g_lock);
260 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
261 	ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
262 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
263 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
264 
265 		mutex_enter(&ns->netstack_lock);
266 		if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
267 			nms->nms_flags = 0;
268 			DTRACE_PROBE2(netstack__destroy__done,
269 			    netstack_t *, ns, int, moduleid);
270 		}
271 		mutex_exit(&ns->netstack_lock);
272 	}
273 
274 	ns_reg[moduleid].nr_create = NULL;
275 	ns_reg[moduleid].nr_shutdown = NULL;
276 	ns_reg[moduleid].nr_destroy = NULL;
277 	ns_reg[moduleid].nr_flags = 0;
278 	mutex_exit(&netstack_g_lock);
279 }
280 
281 /*
282  * Lookup and/or allocate a netstack for this zone.
283  */
284 static void *
285 netstack_zone_create(zoneid_t zoneid)
286 {
287 	netstackid_t stackid;
288 	netstack_t *ns;
289 	netstack_t **nsp;
290 	zone_t	*zone;
291 	int i;
292 
293 	ASSERT(netstack_initialized);
294 
295 	zone = zone_find_by_id_nolock(zoneid);
296 	ASSERT(zone != NULL);
297 
298 	if (zone->zone_flags & ZF_NET_EXCL) {
299 		stackid = zoneid;
300 	} else {
301 		/* Look for the stack instance for the global */
302 		stackid = GLOBAL_NETSTACKID;
303 	}
304 
305 	/* Allocate even if it isn't needed; simplifies locking */
306 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
307 
308 	/* Look if there is a matching stack instance */
309 	mutex_enter(&netstack_g_lock);
310 	for (nsp = &netstack_head; *nsp != NULL;
311 	    nsp = &((*nsp)->netstack_next)) {
312 		if ((*nsp)->netstack_stackid == stackid) {
313 			/*
314 			 * Should never find a pre-existing exclusive stack
315 			 */
316 			ASSERT(stackid == GLOBAL_NETSTACKID);
317 			kmem_free(ns, sizeof (netstack_t));
318 			ns = *nsp;
319 			mutex_enter(&ns->netstack_lock);
320 			ns->netstack_numzones++;
321 			mutex_exit(&ns->netstack_lock);
322 			mutex_exit(&netstack_g_lock);
323 			DTRACE_PROBE1(netstack__inc__numzones,
324 			    netstack_t *, ns);
325 			/* Record that we have a new shared stack zone */
326 			netstack_shared_zone_add(zoneid);
327 			zone->zone_netstack = ns;
328 			return (ns);
329 		}
330 	}
331 	/* Not found */
332 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
333 	cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
334 	ns->netstack_stackid = zoneid;
335 	ns->netstack_numzones = 1;
336 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
337 	ns->netstack_flags = NSF_UNINIT;
338 	*nsp = ns;
339 	zone->zone_netstack = ns;
340 
341 	mutex_enter(&ns->netstack_lock);
342 	/*
343 	 * Mark this netstack as having a CREATE running so
344 	 * any netstack_register/netstack_unregister waits for
345 	 * the existing create callbacks to complete in moduleid order
346 	 */
347 	ns->netstack_flags |= NSF_ZONE_CREATE;
348 
349 	/*
350 	 * Determine the set of module create functions that need to be
351 	 * called before we drop the lock.
352 	 * Set NSS_CREATE_NEEDED for each of those.
353 	 * Skip any with NRF_DYING set, since those are in the process of
354 	 * going away, by checking for flags being exactly NRF_REGISTERED.
355 	 */
356 	for (i = 0; i < NS_MAX; i++) {
357 		nm_state_t *nms = &ns->netstack_m_state[i];
358 
359 		cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
360 
361 		if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
362 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
363 			nms->nms_flags |= NSS_CREATE_NEEDED;
364 			DTRACE_PROBE2(netstack__create__needed,
365 			    netstack_t *, ns, int, i);
366 		}
367 	}
368 	mutex_exit(&ns->netstack_lock);
369 	mutex_exit(&netstack_g_lock);
370 
371 	apply_all_modules(ns, netstack_apply_create);
372 
373 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
374 	mutex_enter(&ns->netstack_lock);
375 	ns->netstack_flags &= ~NSF_UNINIT;
376 	ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
377 	ns->netstack_flags &= ~NSF_ZONE_CREATE;
378 	cv_broadcast(&ns->netstack_cv);
379 	mutex_exit(&ns->netstack_lock);
380 
381 	return (ns);
382 }
383 
384 /* ARGSUSED */
385 static void
386 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
387 {
388 	netstack_t *ns = (netstack_t *)arg;
389 	int i;
390 
391 	ASSERT(arg != NULL);
392 
393 	mutex_enter(&ns->netstack_lock);
394 	ASSERT(ns->netstack_numzones > 0);
395 	if (ns->netstack_numzones != 1) {
396 		/* Stack instance being used by other zone */
397 		mutex_exit(&ns->netstack_lock);
398 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
399 		return;
400 	}
401 	mutex_exit(&ns->netstack_lock);
402 
403 	mutex_enter(&netstack_g_lock);
404 	mutex_enter(&ns->netstack_lock);
405 	/*
406 	 * Mark this netstack as having a SHUTDOWN running so
407 	 * any netstack_register/netstack_unregister waits for
408 	 * the existing create callbacks to complete in moduleid order
409 	 */
410 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
411 	ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
412 
413 	/*
414 	 * Determine the set of stacks that exist before we drop the lock.
415 	 * Set NSS_SHUTDOWN_NEEDED for each of those.
416 	 */
417 	for (i = 0; i < NS_MAX; i++) {
418 		nm_state_t *nms = &ns->netstack_m_state[i];
419 
420 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
421 		    ns_reg[i].nr_shutdown != NULL &&
422 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
423 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
424 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
425 			DTRACE_PROBE2(netstack__shutdown__needed,
426 			    netstack_t *, ns, int, i);
427 		}
428 	}
429 	mutex_exit(&ns->netstack_lock);
430 	mutex_exit(&netstack_g_lock);
431 
432 	/*
433 	 * Call the shutdown function for all registered modules for this
434 	 * netstack.
435 	 */
436 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
437 
438 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
439 	mutex_enter(&ns->netstack_lock);
440 	ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
441 	ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
442 	cv_broadcast(&ns->netstack_cv);
443 	mutex_exit(&ns->netstack_lock);
444 }
445 
446 /*
447  * Common routine to release a zone.
448  * If this was the last zone using the stack instance then prepare to
449  * have the refcnt dropping to zero free the zone.
450  */
451 /* ARGSUSED */
452 static void
453 netstack_zone_destroy(zoneid_t zoneid, void *arg)
454 {
455 	netstack_t *ns = (netstack_t *)arg;
456 
457 	ASSERT(arg != NULL);
458 
459 	mutex_enter(&ns->netstack_lock);
460 	ASSERT(ns->netstack_numzones > 0);
461 	ns->netstack_numzones--;
462 	if (ns->netstack_numzones != 0) {
463 		/* Stack instance being used by other zone */
464 		mutex_exit(&ns->netstack_lock);
465 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
466 		/* Record that we a shared stack zone has gone away */
467 		netstack_shared_zone_remove(zoneid);
468 		return;
469 	}
470 	/*
471 	 * Set CLOSING so that netstack_find_by will not find it.
472 	 */
473 	ns->netstack_flags |= NSF_CLOSING;
474 	mutex_exit(&ns->netstack_lock);
475 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
476 	/* No other thread can call zone_destroy for this stack */
477 
478 	/*
479 	 * Decrease refcnt to account for the one in netstack_zone_init()
480 	 */
481 	netstack_rele(ns);
482 }
483 
484 /*
485  * Called when the reference count drops to zero.
486  * Call the destroy functions for each registered module.
487  */
488 static void
489 netstack_stack_inactive(netstack_t *ns)
490 {
491 	int i;
492 
493 	mutex_enter(&netstack_g_lock);
494 	mutex_enter(&ns->netstack_lock);
495 	/*
496 	 * Mark this netstack as having a DESTROY running so
497 	 * any netstack_register/netstack_unregister waits for
498 	 * the existing destroy callbacks to complete in reverse moduleid order
499 	 */
500 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
501 	ns->netstack_flags |= NSF_ZONE_DESTROY;
502 	/*
503 	 * If the shutdown callback wasn't called earlier (e.g., if this is
504 	 * a netstack shared between multiple zones), then we schedule it now.
505 	 *
506 	 * Determine the set of stacks that exist before we drop the lock.
507 	 * Set NSS_DESTROY_NEEDED for each of those. That
508 	 * ensures that when we return all the callbacks for existing
509 	 * instances have completed.
510 	 */
511 	for (i = 0; i < NS_MAX; i++) {
512 		nm_state_t *nms = &ns->netstack_m_state[i];
513 
514 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
515 		    ns_reg[i].nr_shutdown != NULL &&
516 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
517 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
518 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
519 			DTRACE_PROBE2(netstack__shutdown__needed,
520 			    netstack_t *, ns, int, i);
521 		}
522 
523 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
524 		    ns_reg[i].nr_destroy != NULL &&
525 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
526 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
527 			nms->nms_flags |= NSS_DESTROY_NEEDED;
528 			DTRACE_PROBE2(netstack__destroy__needed,
529 			    netstack_t *, ns, int, i);
530 		}
531 	}
532 	mutex_exit(&ns->netstack_lock);
533 	mutex_exit(&netstack_g_lock);
534 
535 	/*
536 	 * Call the shutdown and destroy functions for all registered modules
537 	 * for this netstack.
538 	 *
539 	 * Since there are some ordering dependencies between the modules we
540 	 * tear them down in the reverse order of what was used to create them.
541 	 *
542 	 * Since a netstack_t is never reused (when a zone is rebooted it gets
543 	 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
544 	 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
545 	 * That is different than in the netstack_unregister() case.
546 	 */
547 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
548 	apply_all_modules_reverse(ns, netstack_apply_destroy);
549 
550 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
551 	mutex_enter(&ns->netstack_lock);
552 	ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
553 	ns->netstack_flags &= ~NSF_ZONE_DESTROY;
554 	cv_broadcast(&ns->netstack_cv);
555 	mutex_exit(&ns->netstack_lock);
556 }
557 
558 /*
559  * Apply a function to all netstacks for a particular moduleid.
560  *
561  * If there is any zone activity (due to a zone being created, shutdown,
562  * or destroyed) we wait for that to complete before we proceed. This ensures
563  * that the moduleids are processed in order when a zone is created or
564  * destroyed.
565  *
566  * The applyfn has to drop netstack_g_lock if it does some work.
567  * In that case we don't follow netstack_next,
568  * even if it is possible to do so without any hazards. This is
569  * because we want the design to allow for the list of netstacks threaded
570  * by netstack_next to change in any arbitrary way during the time the
571  * lock was dropped.
572  *
573  * It is safe to restart the loop at netstack_head since the applyfn
574  * changes netstack_m_state as it processes things, so a subsequent
575  * pass through will have no effect in applyfn, hence the loop will terminate
576  * in at worst O(N^2).
577  */
578 static void
579 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
580 {
581 	netstack_t *ns;
582 
583 	mutex_enter(&netstack_g_lock);
584 	ns = netstack_head;
585 	while (ns != NULL) {
586 		if (wait_for_zone_creator(ns, &netstack_g_lock)) {
587 			/* Lock dropped - restart at head */
588 			ns = netstack_head;
589 		} else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
590 			/* Lock dropped - restart at head */
591 			ns = netstack_head;
592 		} else {
593 			ns = ns->netstack_next;
594 		}
595 	}
596 	mutex_exit(&netstack_g_lock);
597 }
598 
599 /*
600  * Apply a function to all moduleids for a particular netstack.
601  *
602  * Since the netstack linkage doesn't matter in this case we can
603  * ignore whether the function drops the lock.
604  */
605 static void
606 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
607 {
608 	int i;
609 
610 	mutex_enter(&netstack_g_lock);
611 	for (i = 0; i < NS_MAX; i++) {
612 		/*
613 		 * We don't care whether the lock was dropped
614 		 * since we are not iterating over netstack_head.
615 		 */
616 		(void) (applyfn)(&netstack_g_lock, ns, i);
617 	}
618 	mutex_exit(&netstack_g_lock);
619 }
620 
621 /* Like the above but in reverse moduleid order */
622 static void
623 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
624 {
625 	int i;
626 
627 	mutex_enter(&netstack_g_lock);
628 	for (i = NS_MAX-1; i >= 0; i--) {
629 		/*
630 		 * We don't care whether the lock was dropped
631 		 * since we are not iterating over netstack_head.
632 		 */
633 		(void) (applyfn)(&netstack_g_lock, ns, i);
634 	}
635 	mutex_exit(&netstack_g_lock);
636 }
637 
638 /*
639  * Call the create function for the ns and moduleid if CREATE_NEEDED
640  * is set.
641  * If some other thread gets here first and sets *_INPROGRESS, then
642  * we wait for that thread to complete so that we can ensure that
643  * all the callbacks are done when we've looped over all netstacks/moduleids.
644  *
645  * When we call the create function, we temporarily drop the netstack_lock
646  * held by the caller, and return true to tell the caller it needs to
647  * re-evalute the state.
648  */
649 static boolean_t
650 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
651 {
652 	void *result;
653 	netstackid_t stackid;
654 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
655 	boolean_t dropped = B_FALSE;
656 
657 	ASSERT(MUTEX_HELD(lockp));
658 	mutex_enter(&ns->netstack_lock);
659 
660 	if (wait_for_nms_inprogress(ns, nms, lockp))
661 		dropped = B_TRUE;
662 
663 	if (nms->nms_flags & NSS_CREATE_NEEDED) {
664 		nms->nms_flags &= ~NSS_CREATE_NEEDED;
665 		nms->nms_flags |= NSS_CREATE_INPROGRESS;
666 		DTRACE_PROBE2(netstack__create__inprogress,
667 		    netstack_t *, ns, int, moduleid);
668 		mutex_exit(&ns->netstack_lock);
669 		mutex_exit(lockp);
670 		dropped = B_TRUE;
671 
672 		ASSERT(ns_reg[moduleid].nr_create != NULL);
673 		stackid = ns->netstack_stackid;
674 		DTRACE_PROBE2(netstack__create__start,
675 		    netstackid_t, stackid,
676 		    netstack_t *, ns);
677 printf("ns[%d](%d).create\n", moduleid, stackid);
678 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
679 		DTRACE_PROBE2(netstack__create__end,
680 		    void *, result, netstack_t *, ns);
681 
682 		ASSERT(result != NULL);
683 		mutex_enter(lockp);
684 		mutex_enter(&ns->netstack_lock);
685 		ns->netstack_modules[moduleid] = result;
686 		nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
687 		nms->nms_flags |= NSS_CREATE_COMPLETED;
688 		cv_broadcast(&nms->nms_cv);
689 		DTRACE_PROBE2(netstack__create__completed,
690 		    netstack_t *, ns, int, moduleid);
691 		mutex_exit(&ns->netstack_lock);
692 		return (dropped);
693 	} else {
694 		mutex_exit(&ns->netstack_lock);
695 		return (dropped);
696 	}
697 }
698 
699 /*
700  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
701  * is set.
702  * If some other thread gets here first and sets *_INPROGRESS, then
703  * we wait for that thread to complete so that we can ensure that
704  * all the callbacks are done when we've looped over all netstacks/moduleids.
705  *
706  * When we call the shutdown function, we temporarily drop the netstack_lock
707  * held by the caller, and return true to tell the caller it needs to
708  * re-evalute the state.
709  */
710 static boolean_t
711 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
712 {
713 	netstackid_t stackid;
714 	void * netstack_module;
715 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
716 	boolean_t dropped = B_FALSE;
717 
718 	ASSERT(MUTEX_HELD(lockp));
719 	mutex_enter(&ns->netstack_lock);
720 
721 	if (wait_for_nms_inprogress(ns, nms, lockp))
722 		dropped = B_TRUE;
723 
724 	if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
725 		nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
726 		nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
727 		DTRACE_PROBE2(netstack__shutdown__inprogress,
728 		    netstack_t *, ns, int, moduleid);
729 		mutex_exit(&ns->netstack_lock);
730 		mutex_exit(lockp);
731 		dropped = B_TRUE;
732 
733 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
734 		stackid = ns->netstack_stackid;
735 		netstack_module = ns->netstack_modules[moduleid];
736 		DTRACE_PROBE2(netstack__shutdown__start,
737 		    netstackid_t, stackid,
738 		    void *, netstack_module);
739 printf("ns[%d](%d).shutdown\n", moduleid, stackid);
740 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
741 		DTRACE_PROBE1(netstack__shutdown__end,
742 		    netstack_t *, ns);
743 
744 		mutex_enter(lockp);
745 		mutex_enter(&ns->netstack_lock);
746 		nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
747 		nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
748 		cv_broadcast(&nms->nms_cv);
749 		DTRACE_PROBE2(netstack__shutdown__completed,
750 		    netstack_t *, ns, int, moduleid);
751 		mutex_exit(&ns->netstack_lock);
752 		return (dropped);
753 	} else {
754 		mutex_exit(&ns->netstack_lock);
755 		return (dropped);
756 	}
757 }
758 
759 /*
760  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
761  * is set.
762  * If some other thread gets here first and sets *_INPROGRESS, then
763  * we wait for that thread to complete so that we can ensure that
764  * all the callbacks are done when we've looped over all netstacks/moduleids.
765  *
766  * When we call the destroy function, we temporarily drop the netstack_lock
767  * held by the caller, and return true to tell the caller it needs to
768  * re-evalute the state.
769  */
770 static boolean_t
771 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
772 {
773 	netstackid_t stackid;
774 	void * netstack_module;
775 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
776 	boolean_t dropped = B_FALSE;
777 
778 	ASSERT(MUTEX_HELD(lockp));
779 	mutex_enter(&ns->netstack_lock);
780 
781 	if (wait_for_nms_inprogress(ns, nms, lockp))
782 		dropped = B_TRUE;
783 
784 	if (nms->nms_flags & NSS_DESTROY_NEEDED) {
785 		nms->nms_flags &= ~NSS_DESTROY_NEEDED;
786 		nms->nms_flags |= NSS_DESTROY_INPROGRESS;
787 		DTRACE_PROBE2(netstack__destroy__inprogress,
788 		    netstack_t *, ns, int, moduleid);
789 		mutex_exit(&ns->netstack_lock);
790 		mutex_exit(lockp);
791 		dropped = B_TRUE;
792 
793 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
794 		stackid = ns->netstack_stackid;
795 		netstack_module = ns->netstack_modules[moduleid];
796 		DTRACE_PROBE2(netstack__destroy__start,
797 		    netstackid_t, stackid,
798 		    void *, netstack_module);
799 printf("ns[%d](%d).destroy\n", moduleid, stackid);
800 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
801 		DTRACE_PROBE1(netstack__destroy__end,
802 		    netstack_t *, ns);
803 
804 		mutex_enter(lockp);
805 		mutex_enter(&ns->netstack_lock);
806 		ns->netstack_modules[moduleid] = NULL;
807 		nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
808 		nms->nms_flags |= NSS_DESTROY_COMPLETED;
809 		cv_broadcast(&nms->nms_cv);
810 		DTRACE_PROBE2(netstack__destroy__completed,
811 		    netstack_t *, ns, int, moduleid);
812 		mutex_exit(&ns->netstack_lock);
813 		return (dropped);
814 	} else {
815 		mutex_exit(&ns->netstack_lock);
816 		return (dropped);
817 	}
818 }
819 
820 /*
821  * If somebody  is creating the netstack (due to a new zone being created)
822  * then we wait for them to complete. This ensures that any additional
823  * netstack_register() doesn't cause the create functions to run out of
824  * order.
825  * Note that we do not need such a global wait in the case of the shutdown
826  * and destroy callbacks, since in that case it is sufficient for both
827  * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
828  * Returns true if lockp was temporarily dropped while waiting.
829  */
830 static boolean_t
831 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
832 {
833 	boolean_t dropped = B_FALSE;
834 
835 	mutex_enter(&ns->netstack_lock);
836 	while (ns->netstack_flags & NSF_ZONE_CREATE) {
837 		DTRACE_PROBE1(netstack__wait__zone__inprogress,
838 		    netstack_t *, ns);
839 		if (lockp != NULL) {
840 			dropped = B_TRUE;
841 			mutex_exit(lockp);
842 		}
843 		cv_wait(&ns->netstack_cv, &ns->netstack_lock);
844 		if (lockp != NULL) {
845 			/* First drop netstack_lock to preserve order */
846 			mutex_exit(&ns->netstack_lock);
847 			mutex_enter(lockp);
848 			mutex_enter(&ns->netstack_lock);
849 		}
850 	}
851 	mutex_exit(&ns->netstack_lock);
852 	return (dropped);
853 }
854 
855 /*
856  * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
857  * combination.
858  * Returns true if lockp was temporarily dropped while waiting.
859  */
860 static boolean_t
861 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
862 {
863 	boolean_t dropped = B_FALSE;
864 
865 	while (nms->nms_flags & NSS_ALL_INPROGRESS) {
866 		DTRACE_PROBE2(netstack__wait__nms__inprogress,
867 		    netstack_t *, ns, nm_state_t *, nms);
868 		if (lockp != NULL) {
869 			dropped = B_TRUE;
870 			mutex_exit(lockp);
871 		}
872 		cv_wait(&nms->nms_cv, &ns->netstack_lock);
873 		if (lockp != NULL) {
874 			/* First drop netstack_lock to preserve order */
875 			mutex_exit(&ns->netstack_lock);
876 			mutex_enter(lockp);
877 			mutex_enter(&ns->netstack_lock);
878 		}
879 	}
880 	return (dropped);
881 }
882 
883 /*
884  * Get the stack instance used in caller's zone.
885  * Increases the reference count, caller must do a netstack_rele.
886  * It can't be called after zone_destroy() has started.
887  */
888 netstack_t *
889 netstack_get_current(void)
890 {
891 	netstack_t *ns;
892 
893 	ns = curproc->p_zone->zone_netstack;
894 	ASSERT(ns != NULL);
895 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
896 		return (NULL);
897 
898 	netstack_hold(ns);
899 
900 	return (ns);
901 }
902 
903 /*
904  * Find a stack instance given the cred.
905  * This is used by the modules to potentially allow for a future when
906  * something other than the zoneid is used to determine the stack.
907  */
908 netstack_t *
909 netstack_find_by_cred(const cred_t *cr)
910 {
911 	zoneid_t zoneid = crgetzoneid(cr);
912 
913 	/* Handle the case when cr_zone is NULL */
914 	if (zoneid == (zoneid_t)-1)
915 		zoneid = GLOBAL_ZONEID;
916 
917 	/* For performance ... */
918 	if (curproc->p_zone->zone_id == zoneid)
919 		return (netstack_get_current());
920 	else
921 		return (netstack_find_by_zoneid(zoneid));
922 }
923 
924 /*
925  * Find a stack instance given the zoneid.
926  * Increases the reference count if found; caller must do a
927  * netstack_rele().
928  *
929  * If there is no exact match then assume the shared stack instance
930  * matches.
931  *
932  * Skip the unitialized ones.
933  */
934 netstack_t *
935 netstack_find_by_zoneid(zoneid_t zoneid)
936 {
937 	netstack_t *ns;
938 	zone_t *zone;
939 
940 	zone = zone_find_by_id(zoneid);
941 
942 	if (zone == NULL)
943 		return (NULL);
944 
945 	ns = zone->zone_netstack;
946 	ASSERT(ns != NULL);
947 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
948 		ns = NULL;
949 	else
950 		netstack_hold(ns);
951 
952 	zone_rele(zone);
953 	return (ns);
954 }
955 
956 /*
957  * Find a stack instance given the zoneid. Can only be called from
958  * the create callback. See the comments in zone_find_by_id_nolock why
959  * that limitation exists.
960  *
961  * Increases the reference count if found; caller must do a
962  * netstack_rele().
963  *
964  * If there is no exact match then assume the shared stack instance
965  * matches.
966  *
967  * Skip the unitialized ones.
968  */
969 netstack_t *
970 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
971 {
972 	netstack_t *ns;
973 	zone_t *zone;
974 
975 	zone = zone_find_by_id_nolock(zoneid);
976 
977 	if (zone == NULL)
978 		return (NULL);
979 
980 	ns = zone->zone_netstack;
981 	ASSERT(ns != NULL);
982 
983 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
984 		ns = NULL;
985 	else
986 		netstack_hold(ns);
987 
988 	/* zone_find_by_id_nolock does not have a hold on the zone */
989 	return (ns);
990 }
991 
992 /*
993  * Find a stack instance given the stackid with exact match?
994  * Increases the reference count if found; caller must do a
995  * netstack_rele().
996  *
997  * Skip the unitialized ones.
998  */
999 netstack_t *
1000 netstack_find_by_stackid(netstackid_t stackid)
1001 {
1002 	netstack_t *ns;
1003 
1004 	mutex_enter(&netstack_g_lock);
1005 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1006 		mutex_enter(&ns->netstack_lock);
1007 		if (ns->netstack_stackid == stackid &&
1008 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1009 			mutex_exit(&ns->netstack_lock);
1010 			netstack_hold(ns);
1011 			mutex_exit(&netstack_g_lock);
1012 			return (ns);
1013 		}
1014 		mutex_exit(&ns->netstack_lock);
1015 	}
1016 	mutex_exit(&netstack_g_lock);
1017 	return (NULL);
1018 }
1019 
1020 void
1021 netstack_rele(netstack_t *ns)
1022 {
1023 	netstack_t **nsp;
1024 	boolean_t found;
1025 	int refcnt, numzones;
1026 	int i;
1027 
1028 	mutex_enter(&ns->netstack_lock);
1029 	ASSERT(ns->netstack_refcnt > 0);
1030 	ns->netstack_refcnt--;
1031 	/*
1032 	 * As we drop the lock additional netstack_rele()s can come in
1033 	 * and decrement the refcnt to zero and free the netstack_t.
1034 	 * Store pointers in local variables and if we were not the last
1035 	 * then don't reference the netstack_t after that.
1036 	 */
1037 	refcnt = ns->netstack_refcnt;
1038 	numzones = ns->netstack_numzones;
1039 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1040 	mutex_exit(&ns->netstack_lock);
1041 
1042 	if (refcnt == 0 && numzones == 0) {
1043 		/*
1044 		 * Time to call the destroy functions and free up
1045 		 * the structure
1046 		 */
1047 		netstack_stack_inactive(ns);
1048 
1049 		/* Make sure nothing increased the references */
1050 		ASSERT(ns->netstack_refcnt == 0);
1051 		ASSERT(ns->netstack_numzones == 0);
1052 
1053 		/* Finally remove from list of netstacks */
1054 		mutex_enter(&netstack_g_lock);
1055 		found = B_FALSE;
1056 		for (nsp = &netstack_head; *nsp != NULL;
1057 		    nsp = &(*nsp)->netstack_next) {
1058 			if (*nsp == ns) {
1059 				*nsp = ns->netstack_next;
1060 				ns->netstack_next = NULL;
1061 				found = B_TRUE;
1062 				break;
1063 			}
1064 		}
1065 		ASSERT(found);
1066 		mutex_exit(&netstack_g_lock);
1067 
1068 		/* Make sure nothing increased the references */
1069 		ASSERT(ns->netstack_refcnt == 0);
1070 		ASSERT(ns->netstack_numzones == 0);
1071 
1072 		ASSERT(ns->netstack_flags & NSF_CLOSING);
1073 
1074 		for (i = 0; i < NS_MAX; i++) {
1075 			nm_state_t *nms = &ns->netstack_m_state[i];
1076 
1077 			cv_destroy(&nms->nms_cv);
1078 		}
1079 		mutex_destroy(&ns->netstack_lock);
1080 		cv_destroy(&ns->netstack_cv);
1081 		kmem_free(ns, sizeof (*ns));
1082 	}
1083 }
1084 
1085 void
1086 netstack_hold(netstack_t *ns)
1087 {
1088 	mutex_enter(&ns->netstack_lock);
1089 	ns->netstack_refcnt++;
1090 	ASSERT(ns->netstack_refcnt > 0);
1091 	mutex_exit(&ns->netstack_lock);
1092 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1093 }
1094 
1095 /*
1096  * To support kstat_create_netstack() using kstat_zone_add we need
1097  * to track both
1098  *  - all zoneids that use the global/shared stack
1099  *  - all kstats that have been added for the shared stack
1100  */
1101 kstat_t *
1102 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1103     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1104     netstackid_t ks_netstackid)
1105 {
1106 	kstat_t *ks;
1107 
1108 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1109 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1110 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1111 		if (ks != NULL)
1112 			netstack_shared_kstat_add(ks);
1113 		return (ks);
1114 	} else {
1115 		zoneid_t zoneid = ks_netstackid;
1116 
1117 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
1118 		    ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1119 	}
1120 }
1121 
1122 void
1123 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1124 {
1125 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1126 		netstack_shared_kstat_remove(ks);
1127 	}
1128 	kstat_delete(ks);
1129 }
1130 
1131 static void
1132 netstack_shared_zone_add(zoneid_t zoneid)
1133 {
1134 	struct shared_zone_list *sz;
1135 	struct shared_kstat_list *sk;
1136 
1137 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1138 	sz->sz_zoneid = zoneid;
1139 
1140 	/* Insert in list */
1141 	mutex_enter(&netstack_shared_lock);
1142 	sz->sz_next = netstack_shared_zones;
1143 	netstack_shared_zones = sz;
1144 
1145 	/*
1146 	 * Perform kstat_zone_add for each existing shared stack kstat.
1147 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1148 	 */
1149 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1150 		kstat_zone_add(sk->sk_kstat, zoneid);
1151 	}
1152 	mutex_exit(&netstack_shared_lock);
1153 }
1154 
1155 static void
1156 netstack_shared_zone_remove(zoneid_t zoneid)
1157 {
1158 	struct shared_zone_list **szp, *sz;
1159 	struct shared_kstat_list *sk;
1160 
1161 	/* Find in list */
1162 	mutex_enter(&netstack_shared_lock);
1163 	sz = NULL;
1164 	for (szp = &netstack_shared_zones; *szp != NULL;
1165 	    szp = &((*szp)->sz_next)) {
1166 		if ((*szp)->sz_zoneid == zoneid) {
1167 			sz = *szp;
1168 			break;
1169 		}
1170 	}
1171 	/* We must find it */
1172 	ASSERT(sz != NULL);
1173 	*szp = sz->sz_next;
1174 	sz->sz_next = NULL;
1175 
1176 	/*
1177 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1178 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1179 	 */
1180 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1181 		kstat_zone_remove(sk->sk_kstat, zoneid);
1182 	}
1183 	mutex_exit(&netstack_shared_lock);
1184 
1185 	kmem_free(sz, sizeof (*sz));
1186 }
1187 
1188 static void
1189 netstack_shared_kstat_add(kstat_t *ks)
1190 {
1191 	struct shared_zone_list *sz;
1192 	struct shared_kstat_list *sk;
1193 
1194 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1195 	sk->sk_kstat = ks;
1196 
1197 	/* Insert in list */
1198 	mutex_enter(&netstack_shared_lock);
1199 	sk->sk_next = netstack_shared_kstats;
1200 	netstack_shared_kstats = sk;
1201 
1202 	/*
1203 	 * Perform kstat_zone_add for each existing shared stack zone.
1204 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1205 	 */
1206 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1207 		kstat_zone_add(ks, sz->sz_zoneid);
1208 	}
1209 	mutex_exit(&netstack_shared_lock);
1210 }
1211 
1212 static void
1213 netstack_shared_kstat_remove(kstat_t *ks)
1214 {
1215 	struct shared_zone_list *sz;
1216 	struct shared_kstat_list **skp, *sk;
1217 
1218 	/* Find in list */
1219 	mutex_enter(&netstack_shared_lock);
1220 	sk = NULL;
1221 	for (skp = &netstack_shared_kstats; *skp != NULL;
1222 	    skp = &((*skp)->sk_next)) {
1223 		if ((*skp)->sk_kstat == ks) {
1224 			sk = *skp;
1225 			break;
1226 		}
1227 	}
1228 	/* Must find it */
1229 	ASSERT(sk != NULL);
1230 	*skp = sk->sk_next;
1231 	sk->sk_next = NULL;
1232 
1233 	/*
1234 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1235 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1236 	 */
1237 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1238 		kstat_zone_remove(ks, sz->sz_zoneid);
1239 	}
1240 	mutex_exit(&netstack_shared_lock);
1241 	kmem_free(sk, sizeof (*sk));
1242 }
1243 
1244 /*
1245  * If a zoneid is part of the shared zone, return true
1246  */
1247 static boolean_t
1248 netstack_find_shared_zoneid(zoneid_t zoneid)
1249 {
1250 	struct shared_zone_list *sz;
1251 
1252 	mutex_enter(&netstack_shared_lock);
1253 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1254 		if (sz->sz_zoneid == zoneid) {
1255 			mutex_exit(&netstack_shared_lock);
1256 			return (B_TRUE);
1257 		}
1258 	}
1259 	mutex_exit(&netstack_shared_lock);
1260 	return (B_FALSE);
1261 }
1262 
1263 /*
1264  * Hide the fact that zoneids and netstackids are allocated from
1265  * the same space in the current implementation.
1266  * We currently do not check that the stackid/zoneids are valid, since there
1267  * is no need for that. But this should only be done for ids that are
1268  * valid.
1269  */
1270 zoneid_t
1271 netstackid_to_zoneid(netstackid_t stackid)
1272 {
1273 	return (stackid);
1274 }
1275 
1276 netstackid_t
1277 zoneid_to_netstackid(zoneid_t zoneid)
1278 {
1279 	if (netstack_find_shared_zoneid(zoneid))
1280 		return (GLOBAL_ZONEID);
1281 	else
1282 		return (zoneid);
1283 }
1284 
1285 /*
1286  * Simplistic support for walking all the handles.
1287  * Example usage:
1288  *	netstack_handle_t nh;
1289  *	netstack_t *ns;
1290  *
1291  *	netstack_next_init(&nh);
1292  *	while ((ns = netstack_next(&nh)) != NULL) {
1293  *		do something;
1294  *		netstack_rele(ns);
1295  *	}
1296  *	netstack_next_fini(&nh);
1297  */
1298 void
1299 netstack_next_init(netstack_handle_t *handle)
1300 {
1301 	*handle = 0;
1302 }
1303 
1304 /* ARGSUSED */
1305 void
1306 netstack_next_fini(netstack_handle_t *handle)
1307 {
1308 }
1309 
1310 netstack_t *
1311 netstack_next(netstack_handle_t *handle)
1312 {
1313 	netstack_t *ns;
1314 	int i, end;
1315 
1316 	end = *handle;
1317 	/* Walk skipping *handle number of instances */
1318 
1319 	/* Look if there is a matching stack instance */
1320 	mutex_enter(&netstack_g_lock);
1321 	ns = netstack_head;
1322 	for (i = 0; i < end; i++) {
1323 		if (ns == NULL)
1324 			break;
1325 		ns = ns->netstack_next;
1326 	}
1327 	/* skip those with that aren't really here */
1328 	while (ns != NULL) {
1329 		mutex_enter(&ns->netstack_lock);
1330 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1331 			mutex_exit(&ns->netstack_lock);
1332 			break;
1333 		}
1334 		mutex_exit(&ns->netstack_lock);
1335 		end++;
1336 		ns = ns->netstack_next;
1337 	}
1338 	if (ns != NULL) {
1339 		*handle = end + 1;
1340 		netstack_hold(ns);
1341 	}
1342 	mutex_exit(&netstack_g_lock);
1343 	return (ns);
1344 }
1345