xref: /titanic_51/usr/src/uts/common/os/netstack.c (revision 589efa9501f3347f21e60905a96ca39427169e10)
1f4b3ec61Sdh155122 /*
2f4b3ec61Sdh155122  * CDDL HEADER START
3f4b3ec61Sdh155122  *
4f4b3ec61Sdh155122  * The contents of this file are subject to the terms of the
5f4b3ec61Sdh155122  * Common Development and Distribution License (the "License").
6f4b3ec61Sdh155122  * You may not use this file except in compliance with the License.
7f4b3ec61Sdh155122  *
8f4b3ec61Sdh155122  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9f4b3ec61Sdh155122  * or http://www.opensolaris.org/os/licensing.
10f4b3ec61Sdh155122  * See the License for the specific language governing permissions
11f4b3ec61Sdh155122  * and limitations under the License.
12f4b3ec61Sdh155122  *
13f4b3ec61Sdh155122  * When distributing Covered Code, include this CDDL HEADER in each
14f4b3ec61Sdh155122  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15f4b3ec61Sdh155122  * If applicable, add the following below this CDDL HEADER, with the
16f4b3ec61Sdh155122  * fields enclosed by brackets "[]" replaced with your own identifying
17f4b3ec61Sdh155122  * information: Portions Copyright [yyyy] [name of copyright owner]
18f4b3ec61Sdh155122  *
19f4b3ec61Sdh155122  * CDDL HEADER END
20f4b3ec61Sdh155122  */
21f4b3ec61Sdh155122 
22f4b3ec61Sdh155122 /*
230a0e9771SDarren Reed  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24f4b3ec61Sdh155122  * Use is subject to license terms.
25*589efa95SRobert Mustacchi  * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
26f4b3ec61Sdh155122  */
27f4b3ec61Sdh155122 
28f4b3ec61Sdh155122 #include <sys/param.h>
29f4b3ec61Sdh155122 #include <sys/sysmacros.h>
30f4b3ec61Sdh155122 #include <sys/vm.h>
31f4b3ec61Sdh155122 #include <sys/proc.h>
32f4b3ec61Sdh155122 #include <sys/tuneable.h>
33f4b3ec61Sdh155122 #include <sys/systm.h>
34f4b3ec61Sdh155122 #include <sys/cmn_err.h>
35f4b3ec61Sdh155122 #include <sys/debug.h>
36f4b3ec61Sdh155122 #include <sys/sdt.h>
37f4b3ec61Sdh155122 #include <sys/mutex.h>
38f4b3ec61Sdh155122 #include <sys/bitmap.h>
39f4b3ec61Sdh155122 #include <sys/atomic.h>
40f4b3ec61Sdh155122 #include <sys/kobj.h>
41f4b3ec61Sdh155122 #include <sys/disp.h>
42f4b3ec61Sdh155122 #include <vm/seg_kmem.h>
43f4b3ec61Sdh155122 #include <sys/zone.h>
44f4b3ec61Sdh155122 #include <sys/netstack.h>
45f4b3ec61Sdh155122 
46f4b3ec61Sdh155122 /*
47f4b3ec61Sdh155122  * What we use so that the zones framework can tell us about new zones,
48f4b3ec61Sdh155122  * which we use to create new stacks.
49f4b3ec61Sdh155122  */
50f4b3ec61Sdh155122 static zone_key_t netstack_zone_key;
51f4b3ec61Sdh155122 
52f4b3ec61Sdh155122 static int	netstack_initialized = 0;
53f4b3ec61Sdh155122 
54f4b3ec61Sdh155122 /*
55f4b3ec61Sdh155122  * Track the registered netstacks.
56f4b3ec61Sdh155122  * The global lock protects
57f4b3ec61Sdh155122  * - ns_reg
58f4b3ec61Sdh155122  * - the list starting at netstack_head and following the netstack_next
59f4b3ec61Sdh155122  *   pointers.
60f4b3ec61Sdh155122  */
61f4b3ec61Sdh155122 static kmutex_t netstack_g_lock;
62f4b3ec61Sdh155122 
63f4b3ec61Sdh155122 /*
64f4b3ec61Sdh155122  * Registry of netstacks with their create/shutdown/destory functions.
65f4b3ec61Sdh155122  */
66f4b3ec61Sdh155122 static struct netstack_registry	ns_reg[NS_MAX];
67f4b3ec61Sdh155122 
68f4b3ec61Sdh155122 /*
69f4b3ec61Sdh155122  * Global list of existing stacks.  We use this when a new zone with
70f4b3ec61Sdh155122  * an exclusive IP instance is created.
71f4b3ec61Sdh155122  *
72f4b3ec61Sdh155122  * Note that in some cases a netstack_t needs to stay around after the zone
73f4b3ec61Sdh155122  * has gone away. This is because there might be outstanding references
74f4b3ec61Sdh155122  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
75f4b3ec61Sdh155122  * structure and all the foo_stack_t's hanging off of it will be cleaned up
76f4b3ec61Sdh155122  * when the last reference to it is dropped.
77f4b3ec61Sdh155122  * However, the same zone might be rebooted. That is handled using the
78f4b3ec61Sdh155122  * assumption that the zones framework picks a new zoneid each time a zone
79f4b3ec61Sdh155122  * is (re)booted. We assert for that condition in netstack_zone_create().
80f4b3ec61Sdh155122  * Thus the old netstack_t can take its time for things to time out.
81f4b3ec61Sdh155122  */
82f4b3ec61Sdh155122 static netstack_t *netstack_head;
83f4b3ec61Sdh155122 
84f4b3ec61Sdh155122 /*
85f4b3ec61Sdh155122  * To support kstat_create_netstack() using kstat_zone_add we need
86f4b3ec61Sdh155122  * to track both
87f4b3ec61Sdh155122  *  - all zoneids that use the global/shared stack
88f4b3ec61Sdh155122  *  - all kstats that have been added for the shared stack
89f4b3ec61Sdh155122  */
90f4b3ec61Sdh155122 struct shared_zone_list {
91f4b3ec61Sdh155122 	struct shared_zone_list *sz_next;
92f4b3ec61Sdh155122 	zoneid_t		sz_zoneid;
93f4b3ec61Sdh155122 };
94f4b3ec61Sdh155122 
95f4b3ec61Sdh155122 struct shared_kstat_list {
96f4b3ec61Sdh155122 	struct shared_kstat_list *sk_next;
97f4b3ec61Sdh155122 	kstat_t			 *sk_kstat;
98f4b3ec61Sdh155122 };
99f4b3ec61Sdh155122 
100f4b3ec61Sdh155122 static kmutex_t netstack_shared_lock;	/* protects the following two */
101f4b3ec61Sdh155122 static struct shared_zone_list	*netstack_shared_zones;
102f4b3ec61Sdh155122 static struct shared_kstat_list	*netstack_shared_kstats;
103f4b3ec61Sdh155122 
104f4b3ec61Sdh155122 static void	*netstack_zone_create(zoneid_t zoneid);
105f4b3ec61Sdh155122 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
106f4b3ec61Sdh155122 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
107f4b3ec61Sdh155122 
108f4b3ec61Sdh155122 static void	netstack_shared_zone_add(zoneid_t zoneid);
109f4b3ec61Sdh155122 static void	netstack_shared_zone_remove(zoneid_t zoneid);
110f4b3ec61Sdh155122 static void	netstack_shared_kstat_add(kstat_t *ks);
111f4b3ec61Sdh155122 static void	netstack_shared_kstat_remove(kstat_t *ks);
112f4b3ec61Sdh155122 
11323f4867fSnordmark typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
114f4b3ec61Sdh155122 
115bd41d0a8Snordmark static void	apply_all_netstacks(int, applyfn_t *);
116bd41d0a8Snordmark static void	apply_all_modules(netstack_t *, applyfn_t *);
117bd41d0a8Snordmark static void	apply_all_modules_reverse(netstack_t *, applyfn_t *);
118bd41d0a8Snordmark static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
119bd41d0a8Snordmark static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
120bd41d0a8Snordmark static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
121bd41d0a8Snordmark static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
122bd41d0a8Snordmark static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
123bd41d0a8Snordmark     kmutex_t *);
124bd41d0a8Snordmark 
125f4b3ec61Sdh155122 void
126f4b3ec61Sdh155122 netstack_init(void)
127f4b3ec61Sdh155122 {
128f4b3ec61Sdh155122 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
129f4b3ec61Sdh155122 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
130f4b3ec61Sdh155122 
131f4b3ec61Sdh155122 	netstack_initialized = 1;
132f4b3ec61Sdh155122 
133f4b3ec61Sdh155122 	/*
134f4b3ec61Sdh155122 	 * We want to be informed each time a zone is created or
135f4b3ec61Sdh155122 	 * destroyed in the kernel, so we can maintain the
136f4b3ec61Sdh155122 	 * stack instance information.
137f4b3ec61Sdh155122 	 */
138f4b3ec61Sdh155122 	zone_key_create(&netstack_zone_key, netstack_zone_create,
139f4b3ec61Sdh155122 	    netstack_zone_shutdown, netstack_zone_destroy);
140f4b3ec61Sdh155122 }
141f4b3ec61Sdh155122 
142f4b3ec61Sdh155122 /*
143f4b3ec61Sdh155122  * Register a new module with the framework.
144f4b3ec61Sdh155122  * This registers interest in changes to the set of netstacks.
145f4b3ec61Sdh155122  * The createfn and destroyfn are required, but the shutdownfn can be
146f4b3ec61Sdh155122  * NULL.
147f4b3ec61Sdh155122  * Note that due to the current zsd implementation, when the create
148f4b3ec61Sdh155122  * function is called the zone isn't fully present, thus functions
149f4b3ec61Sdh155122  * like zone_find_by_* will fail, hence the create function can not
150f4b3ec61Sdh155122  * use many zones kernel functions including zcmn_err().
151f4b3ec61Sdh155122  */
152f4b3ec61Sdh155122 void
153f4b3ec61Sdh155122 netstack_register(int moduleid,
154f4b3ec61Sdh155122     void *(*module_create)(netstackid_t, netstack_t *),
155f4b3ec61Sdh155122     void (*module_shutdown)(netstackid_t, void *),
156f4b3ec61Sdh155122     void (*module_destroy)(netstackid_t, void *))
157f4b3ec61Sdh155122 {
158f4b3ec61Sdh155122 	netstack_t *ns;
159f4b3ec61Sdh155122 
160f4b3ec61Sdh155122 	ASSERT(netstack_initialized);
161f4b3ec61Sdh155122 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
162f4b3ec61Sdh155122 	ASSERT(module_create != NULL);
163f4b3ec61Sdh155122 
164bd41d0a8Snordmark 	/*
165bd41d0a8Snordmark 	 * Make instances created after this point in time run the create
166bd41d0a8Snordmark 	 * callback.
167bd41d0a8Snordmark 	 */
168f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
169f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_create == NULL);
170f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_flags == 0);
171f4b3ec61Sdh155122 	ns_reg[moduleid].nr_create = module_create;
172f4b3ec61Sdh155122 	ns_reg[moduleid].nr_shutdown = module_shutdown;
173f4b3ec61Sdh155122 	ns_reg[moduleid].nr_destroy = module_destroy;
174f4b3ec61Sdh155122 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
175f4b3ec61Sdh155122 
176f4b3ec61Sdh155122 	/*
177f4b3ec61Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
178bd41d0a8Snordmark 	 * Set NSS_CREATE_NEEDED for each of those.
179f4b3ec61Sdh155122 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
180f4b3ec61Sdh155122 	 * set, but check NSF_CLOSING to be sure.
181f4b3ec61Sdh155122 	 */
182f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
183bd41d0a8Snordmark 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
184bd41d0a8Snordmark 
185f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
186f4b3ec61Sdh155122 		if (!(ns->netstack_flags & NSF_CLOSING) &&
187bd41d0a8Snordmark 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
188bd41d0a8Snordmark 			nms->nms_flags |= NSS_CREATE_NEEDED;
189f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__create__needed,
190f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
191f4b3ec61Sdh155122 		}
192f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
193f4b3ec61Sdh155122 	}
194f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
195f4b3ec61Sdh155122 
196f4b3ec61Sdh155122 	/*
197bd41d0a8Snordmark 	 * At this point in time a new instance can be created or an instance
198bd41d0a8Snordmark 	 * can be destroyed, or some other module can register or unregister.
199bd41d0a8Snordmark 	 * Make sure we either run all the create functions for this moduleid
200bd41d0a8Snordmark 	 * or we wait for any other creators for this moduleid.
201f4b3ec61Sdh155122 	 */
202bd41d0a8Snordmark 	apply_all_netstacks(moduleid, netstack_apply_create);
203f4b3ec61Sdh155122 }
204f4b3ec61Sdh155122 
205f4b3ec61Sdh155122 void
206f4b3ec61Sdh155122 netstack_unregister(int moduleid)
207f4b3ec61Sdh155122 {
208f4b3ec61Sdh155122 	netstack_t *ns;
209f4b3ec61Sdh155122 
210f4b3ec61Sdh155122 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
211f4b3ec61Sdh155122 
212f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_create != NULL);
213f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
214f4b3ec61Sdh155122 
215f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
216f4b3ec61Sdh155122 	/*
217f4b3ec61Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
218bd41d0a8Snordmark 	 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
219bd41d0a8Snordmark 	 * That ensures that when we return all the callbacks for existing
220bd41d0a8Snordmark 	 * instances have completed. And since we set NRF_DYING no new
221bd41d0a8Snordmark 	 * instances can use this module.
222f4b3ec61Sdh155122 	 */
223f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
224*589efa95SRobert Mustacchi 		boolean_t created = B_FALSE;
225bd41d0a8Snordmark 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
226bd41d0a8Snordmark 
227f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
228*589efa95SRobert Mustacchi 
229*589efa95SRobert Mustacchi 		/*
230*589efa95SRobert Mustacchi 		 * We need to be careful here. We could actually have a netstack
231*589efa95SRobert Mustacchi 		 * being created as we speak waiting for us to let go of this
232*589efa95SRobert Mustacchi 		 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
233*589efa95SRobert Mustacchi 		 * have gotten to the point of completing it yet. If
234*589efa95SRobert Mustacchi 		 * NSS_CREATE_NEEDED, we can safely just remove it here and
235*589efa95SRobert Mustacchi 		 * never create the module. However, if NSS_CREATE_INPROGRESS is
236*589efa95SRobert Mustacchi 		 * set, we need to still flag this module for shutdown and
237*589efa95SRobert Mustacchi 		 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
238*589efa95SRobert Mustacchi 		 *
239*589efa95SRobert Mustacchi 		 * It is safe to do that because of two different guarantees
240*589efa95SRobert Mustacchi 		 * that exist in the system. The first is that before we do a
241*589efa95SRobert Mustacchi 		 * create, shutdown, or destroy, we ensure that nothing else is
242*589efa95SRobert Mustacchi 		 * in progress in the system for this netstack and wait for it
243*589efa95SRobert Mustacchi 		 * to complete. Secondly, because the zone is being created, we
244*589efa95SRobert Mustacchi 		 * know that the following call to apply_all_netstack will block
245*589efa95SRobert Mustacchi 		 * on the zone finishing its initialization.
246*589efa95SRobert Mustacchi 		 */
247*589efa95SRobert Mustacchi 		if (nms->nms_flags & NSS_CREATE_NEEDED)
248*589efa95SRobert Mustacchi 			nms->nms_flags &= ~NSS_CREATE_NEEDED;
249*589efa95SRobert Mustacchi 
250*589efa95SRobert Mustacchi 		if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
251*589efa95SRobert Mustacchi 		    nms->nms_flags & NSS_CREATE_COMPLETED)
252*589efa95SRobert Mustacchi 			created = B_TRUE;
253*589efa95SRobert Mustacchi 
254*589efa95SRobert Mustacchi 		if (ns_reg[moduleid].nr_shutdown != NULL && created &&
255bd41d0a8Snordmark 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
256bd41d0a8Snordmark 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
257bd41d0a8Snordmark 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
258f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
259f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
260f4b3ec61Sdh155122 		}
261f4b3ec61Sdh155122 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
262*589efa95SRobert Mustacchi 		    ns_reg[moduleid].nr_destroy != NULL && created &&
263bd41d0a8Snordmark 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
264bd41d0a8Snordmark 			nms->nms_flags |= NSS_DESTROY_NEEDED;
265f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__destroy__needed,
266f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
267f4b3ec61Sdh155122 		}
268f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
269f4b3ec61Sdh155122 	}
270bd41d0a8Snordmark 	/*
271bd41d0a8Snordmark 	 * Prevent any new netstack from calling the registered create
272bd41d0a8Snordmark 	 * function, while keeping the function pointers in place until the
273bd41d0a8Snordmark 	 * shutdown and destroy callbacks are complete.
274bd41d0a8Snordmark 	 */
275bd41d0a8Snordmark 	ns_reg[moduleid].nr_flags |= NRF_DYING;
276f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
277f4b3ec61Sdh155122 
278bd41d0a8Snordmark 	apply_all_netstacks(moduleid, netstack_apply_shutdown);
279bd41d0a8Snordmark 	apply_all_netstacks(moduleid, netstack_apply_destroy);
280f4b3ec61Sdh155122 
281f4b3ec61Sdh155122 	/*
282bd41d0a8Snordmark 	 * Clear the nms_flags so that we can handle this module
283f4b3ec61Sdh155122 	 * being loaded again.
284bd41d0a8Snordmark 	 * Also remove the registered functions.
285f4b3ec61Sdh155122 	 */
286f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
287bd41d0a8Snordmark 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
288bd41d0a8Snordmark 	ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
289f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
290bd41d0a8Snordmark 		nm_state_t *nms = &ns->netstack_m_state[moduleid];
291bd41d0a8Snordmark 
292f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
293bd41d0a8Snordmark 		if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
294bd41d0a8Snordmark 			nms->nms_flags = 0;
295f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__destroy__done,
296f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
297f4b3ec61Sdh155122 		}
298f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
299f4b3ec61Sdh155122 	}
300f4b3ec61Sdh155122 
301f4b3ec61Sdh155122 	ns_reg[moduleid].nr_create = NULL;
302f4b3ec61Sdh155122 	ns_reg[moduleid].nr_shutdown = NULL;
303f4b3ec61Sdh155122 	ns_reg[moduleid].nr_destroy = NULL;
304f4b3ec61Sdh155122 	ns_reg[moduleid].nr_flags = 0;
305f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
306f4b3ec61Sdh155122 }
307f4b3ec61Sdh155122 
308f4b3ec61Sdh155122 /*
309f4b3ec61Sdh155122  * Lookup and/or allocate a netstack for this zone.
310f4b3ec61Sdh155122  */
311f4b3ec61Sdh155122 static void *
312f4b3ec61Sdh155122 netstack_zone_create(zoneid_t zoneid)
313f4b3ec61Sdh155122 {
314f4b3ec61Sdh155122 	netstackid_t stackid;
315f4b3ec61Sdh155122 	netstack_t *ns;
316f4b3ec61Sdh155122 	netstack_t **nsp;
317f4b3ec61Sdh155122 	zone_t	*zone;
318f4b3ec61Sdh155122 	int i;
319f4b3ec61Sdh155122 
320f4b3ec61Sdh155122 	ASSERT(netstack_initialized);
321f4b3ec61Sdh155122 
322f4b3ec61Sdh155122 	zone = zone_find_by_id_nolock(zoneid);
323f4b3ec61Sdh155122 	ASSERT(zone != NULL);
324f4b3ec61Sdh155122 
325f4b3ec61Sdh155122 	if (zone->zone_flags & ZF_NET_EXCL) {
326f4b3ec61Sdh155122 		stackid = zoneid;
327f4b3ec61Sdh155122 	} else {
328f4b3ec61Sdh155122 		/* Look for the stack instance for the global */
329f4b3ec61Sdh155122 		stackid = GLOBAL_NETSTACKID;
330f4b3ec61Sdh155122 	}
331f4b3ec61Sdh155122 
332f4b3ec61Sdh155122 	/* Allocate even if it isn't needed; simplifies locking */
333f4b3ec61Sdh155122 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
334f4b3ec61Sdh155122 
335f4b3ec61Sdh155122 	/* Look if there is a matching stack instance */
336f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
337f4b3ec61Sdh155122 	for (nsp = &netstack_head; *nsp != NULL;
338f4b3ec61Sdh155122 	    nsp = &((*nsp)->netstack_next)) {
339f4b3ec61Sdh155122 		if ((*nsp)->netstack_stackid == stackid) {
340f4b3ec61Sdh155122 			/*
341f4b3ec61Sdh155122 			 * Should never find a pre-existing exclusive stack
342f4b3ec61Sdh155122 			 */
343f4b3ec61Sdh155122 			ASSERT(stackid == GLOBAL_NETSTACKID);
344f4b3ec61Sdh155122 			kmem_free(ns, sizeof (netstack_t));
345f4b3ec61Sdh155122 			ns = *nsp;
346f4b3ec61Sdh155122 			mutex_enter(&ns->netstack_lock);
347f4b3ec61Sdh155122 			ns->netstack_numzones++;
348f4b3ec61Sdh155122 			mutex_exit(&ns->netstack_lock);
349f4b3ec61Sdh155122 			mutex_exit(&netstack_g_lock);
350f4b3ec61Sdh155122 			DTRACE_PROBE1(netstack__inc__numzones,
351f4b3ec61Sdh155122 			    netstack_t *, ns);
352f4b3ec61Sdh155122 			/* Record that we have a new shared stack zone */
353f4b3ec61Sdh155122 			netstack_shared_zone_add(zoneid);
354f4b3ec61Sdh155122 			zone->zone_netstack = ns;
355f4b3ec61Sdh155122 			return (ns);
356f4b3ec61Sdh155122 		}
357f4b3ec61Sdh155122 	}
358f4b3ec61Sdh155122 	/* Not found */
359f4b3ec61Sdh155122 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
360bd41d0a8Snordmark 	cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
361f4b3ec61Sdh155122 	ns->netstack_stackid = zoneid;
362f4b3ec61Sdh155122 	ns->netstack_numzones = 1;
363f4b3ec61Sdh155122 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
364f4b3ec61Sdh155122 	ns->netstack_flags = NSF_UNINIT;
365f4b3ec61Sdh155122 	*nsp = ns;
366f4b3ec61Sdh155122 	zone->zone_netstack = ns;
367f4b3ec61Sdh155122 
368bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
369bd41d0a8Snordmark 	/*
370bd41d0a8Snordmark 	 * Mark this netstack as having a CREATE running so
371bd41d0a8Snordmark 	 * any netstack_register/netstack_unregister waits for
372bd41d0a8Snordmark 	 * the existing create callbacks to complete in moduleid order
373bd41d0a8Snordmark 	 */
374bd41d0a8Snordmark 	ns->netstack_flags |= NSF_ZONE_CREATE;
375bd41d0a8Snordmark 
376f4b3ec61Sdh155122 	/*
377f4b3ec61Sdh155122 	 * Determine the set of module create functions that need to be
378f4b3ec61Sdh155122 	 * called before we drop the lock.
379bd41d0a8Snordmark 	 * Set NSS_CREATE_NEEDED for each of those.
380bd41d0a8Snordmark 	 * Skip any with NRF_DYING set, since those are in the process of
381bd41d0a8Snordmark 	 * going away, by checking for flags being exactly NRF_REGISTERED.
382f4b3ec61Sdh155122 	 */
383f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
384bd41d0a8Snordmark 		nm_state_t *nms = &ns->netstack_m_state[i];
385bd41d0a8Snordmark 
386bd41d0a8Snordmark 		cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
387bd41d0a8Snordmark 
388bd41d0a8Snordmark 		if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
389bd41d0a8Snordmark 		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
390bd41d0a8Snordmark 			nms->nms_flags |= NSS_CREATE_NEEDED;
391f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__create__needed,
392f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
393f4b3ec61Sdh155122 		}
394f4b3ec61Sdh155122 	}
395bd41d0a8Snordmark 	mutex_exit(&ns->netstack_lock);
396f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
397f4b3ec61Sdh155122 
398bd41d0a8Snordmark 	apply_all_modules(ns, netstack_apply_create);
399f4b3ec61Sdh155122 
400bd41d0a8Snordmark 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
401f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
402f4b3ec61Sdh155122 	ns->netstack_flags &= ~NSF_UNINIT;
403bd41d0a8Snordmark 	ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
404bd41d0a8Snordmark 	ns->netstack_flags &= ~NSF_ZONE_CREATE;
405bd41d0a8Snordmark 	cv_broadcast(&ns->netstack_cv);
406f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
407f4b3ec61Sdh155122 
408f4b3ec61Sdh155122 	return (ns);
409f4b3ec61Sdh155122 }
410f4b3ec61Sdh155122 
411f4b3ec61Sdh155122 /* ARGSUSED */
412f4b3ec61Sdh155122 static void
413f4b3ec61Sdh155122 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
414f4b3ec61Sdh155122 {
415f4b3ec61Sdh155122 	netstack_t *ns = (netstack_t *)arg;
416f4b3ec61Sdh155122 	int i;
417f4b3ec61Sdh155122 
418f4b3ec61Sdh155122 	ASSERT(arg != NULL);
419f4b3ec61Sdh155122 
420f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
421f4b3ec61Sdh155122 	ASSERT(ns->netstack_numzones > 0);
422f4b3ec61Sdh155122 	if (ns->netstack_numzones != 1) {
423f4b3ec61Sdh155122 		/* Stack instance being used by other zone */
424f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
425f4b3ec61Sdh155122 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
426f4b3ec61Sdh155122 		return;
427f4b3ec61Sdh155122 	}
428f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
429f4b3ec61Sdh155122 
430f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
431bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
432bd41d0a8Snordmark 	/*
433bd41d0a8Snordmark 	 * Mark this netstack as having a SHUTDOWN running so
434bd41d0a8Snordmark 	 * any netstack_register/netstack_unregister waits for
435bd41d0a8Snordmark 	 * the existing create callbacks to complete in moduleid order
436bd41d0a8Snordmark 	 */
437bd41d0a8Snordmark 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
438bd41d0a8Snordmark 	ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
439bd41d0a8Snordmark 
440f4b3ec61Sdh155122 	/*
441f4b3ec61Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
442bd41d0a8Snordmark 	 * Set NSS_SHUTDOWN_NEEDED for each of those.
443f4b3ec61Sdh155122 	 */
444f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
445bd41d0a8Snordmark 		nm_state_t *nms = &ns->netstack_m_state[i];
446bd41d0a8Snordmark 
447f4b3ec61Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
448f4b3ec61Sdh155122 		    ns_reg[i].nr_shutdown != NULL &&
449bd41d0a8Snordmark 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
450bd41d0a8Snordmark 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
451bd41d0a8Snordmark 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
452f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
453f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
454f4b3ec61Sdh155122 		}
455f4b3ec61Sdh155122 	}
456bd41d0a8Snordmark 	mutex_exit(&ns->netstack_lock);
457f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
458f4b3ec61Sdh155122 
45923f4867fSnordmark 	/*
46023f4867fSnordmark 	 * Call the shutdown function for all registered modules for this
46123f4867fSnordmark 	 * netstack.
46223f4867fSnordmark 	 */
4637ddc9b1aSDarren Reed 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
464bd41d0a8Snordmark 
465bd41d0a8Snordmark 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
466bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
467bd41d0a8Snordmark 	ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
468bd41d0a8Snordmark 	ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
469bd41d0a8Snordmark 	cv_broadcast(&ns->netstack_cv);
470bd41d0a8Snordmark 	mutex_exit(&ns->netstack_lock);
471f4b3ec61Sdh155122 }
472f4b3ec61Sdh155122 
473f4b3ec61Sdh155122 /*
474f4b3ec61Sdh155122  * Common routine to release a zone.
475f4b3ec61Sdh155122  * If this was the last zone using the stack instance then prepare to
476f4b3ec61Sdh155122  * have the refcnt dropping to zero free the zone.
477f4b3ec61Sdh155122  */
478f4b3ec61Sdh155122 /* ARGSUSED */
479f4b3ec61Sdh155122 static void
480f4b3ec61Sdh155122 netstack_zone_destroy(zoneid_t zoneid, void *arg)
481f4b3ec61Sdh155122 {
482f4b3ec61Sdh155122 	netstack_t *ns = (netstack_t *)arg;
483f4b3ec61Sdh155122 
484f4b3ec61Sdh155122 	ASSERT(arg != NULL);
485f4b3ec61Sdh155122 
486f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
487f4b3ec61Sdh155122 	ASSERT(ns->netstack_numzones > 0);
488f4b3ec61Sdh155122 	ns->netstack_numzones--;
489f4b3ec61Sdh155122 	if (ns->netstack_numzones != 0) {
490f4b3ec61Sdh155122 		/* Stack instance being used by other zone */
491f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
492f4b3ec61Sdh155122 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
493f4b3ec61Sdh155122 		/* Record that we a shared stack zone has gone away */
494f4b3ec61Sdh155122 		netstack_shared_zone_remove(zoneid);
495f4b3ec61Sdh155122 		return;
496f4b3ec61Sdh155122 	}
497f4b3ec61Sdh155122 	/*
49823f4867fSnordmark 	 * Set CLOSING so that netstack_find_by will not find it.
499f4b3ec61Sdh155122 	 */
500f4b3ec61Sdh155122 	ns->netstack_flags |= NSF_CLOSING;
501f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
502f4b3ec61Sdh155122 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
503f4b3ec61Sdh155122 	/* No other thread can call zone_destroy for this stack */
504f4b3ec61Sdh155122 
505f4b3ec61Sdh155122 	/*
506f4b3ec61Sdh155122 	 * Decrease refcnt to account for the one in netstack_zone_init()
507f4b3ec61Sdh155122 	 */
508f4b3ec61Sdh155122 	netstack_rele(ns);
509f4b3ec61Sdh155122 }
510f4b3ec61Sdh155122 
511f4b3ec61Sdh155122 /*
512f4b3ec61Sdh155122  * Called when the reference count drops to zero.
513f4b3ec61Sdh155122  * Call the destroy functions for each registered module.
514f4b3ec61Sdh155122  */
515f4b3ec61Sdh155122 static void
516f4b3ec61Sdh155122 netstack_stack_inactive(netstack_t *ns)
517f4b3ec61Sdh155122 {
518f4b3ec61Sdh155122 	int i;
519f4b3ec61Sdh155122 
520f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
521bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
522bd41d0a8Snordmark 	/*
523bd41d0a8Snordmark 	 * Mark this netstack as having a DESTROY running so
524bd41d0a8Snordmark 	 * any netstack_register/netstack_unregister waits for
525bd41d0a8Snordmark 	 * the existing destroy callbacks to complete in reverse moduleid order
526bd41d0a8Snordmark 	 */
527bd41d0a8Snordmark 	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
528bd41d0a8Snordmark 	ns->netstack_flags |= NSF_ZONE_DESTROY;
529f4b3ec61Sdh155122 	/*
530f4b3ec61Sdh155122 	 * If the shutdown callback wasn't called earlier (e.g., if this is
531bd41d0a8Snordmark 	 * a netstack shared between multiple zones), then we schedule it now.
532bd41d0a8Snordmark 	 *
533bd41d0a8Snordmark 	 * Determine the set of stacks that exist before we drop the lock.
534bd41d0a8Snordmark 	 * Set NSS_DESTROY_NEEDED for each of those. That
535bd41d0a8Snordmark 	 * ensures that when we return all the callbacks for existing
536bd41d0a8Snordmark 	 * instances have completed.
537f4b3ec61Sdh155122 	 */
538f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
539bd41d0a8Snordmark 		nm_state_t *nms = &ns->netstack_m_state[i];
540bd41d0a8Snordmark 
541f4b3ec61Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
542f4b3ec61Sdh155122 		    ns_reg[i].nr_shutdown != NULL &&
543bd41d0a8Snordmark 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
544bd41d0a8Snordmark 		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
545bd41d0a8Snordmark 			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
546f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
547f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
548f4b3ec61Sdh155122 		}
549bd41d0a8Snordmark 
550f4b3ec61Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
551f4b3ec61Sdh155122 		    ns_reg[i].nr_destroy != NULL &&
552bd41d0a8Snordmark 		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
553bd41d0a8Snordmark 		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
554bd41d0a8Snordmark 			nms->nms_flags |= NSS_DESTROY_NEEDED;
555f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__destroy__needed,
556f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
557f4b3ec61Sdh155122 		}
558f4b3ec61Sdh155122 	}
559bd41d0a8Snordmark 	mutex_exit(&ns->netstack_lock);
560f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
561f4b3ec61Sdh155122 
56223f4867fSnordmark 	/*
56323f4867fSnordmark 	 * Call the shutdown and destroy functions for all registered modules
56423f4867fSnordmark 	 * for this netstack.
565bd41d0a8Snordmark 	 *
566bd41d0a8Snordmark 	 * Since there are some ordering dependencies between the modules we
567bd41d0a8Snordmark 	 * tear them down in the reverse order of what was used to create them.
568bd41d0a8Snordmark 	 *
569bd41d0a8Snordmark 	 * Since a netstack_t is never reused (when a zone is rebooted it gets
570bd41d0a8Snordmark 	 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
571bd41d0a8Snordmark 	 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
572bd41d0a8Snordmark 	 * That is different than in the netstack_unregister() case.
57323f4867fSnordmark 	 */
5747ddc9b1aSDarren Reed 	apply_all_modules_reverse(ns, netstack_apply_shutdown);
575bd41d0a8Snordmark 	apply_all_modules_reverse(ns, netstack_apply_destroy);
576f4b3ec61Sdh155122 
577bd41d0a8Snordmark 	/* Tell any waiting netstack_register/netstack_unregister to proceed */
578f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
579bd41d0a8Snordmark 	ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
580bd41d0a8Snordmark 	ns->netstack_flags &= ~NSF_ZONE_DESTROY;
581bd41d0a8Snordmark 	cv_broadcast(&ns->netstack_cv);
582f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
583f4b3ec61Sdh155122 }
584f4b3ec61Sdh155122 
58523f4867fSnordmark /*
58623f4867fSnordmark  * Apply a function to all netstacks for a particular moduleid.
58723f4867fSnordmark  *
588bd41d0a8Snordmark  * If there is any zone activity (due to a zone being created, shutdown,
589bd41d0a8Snordmark  * or destroyed) we wait for that to complete before we proceed. This ensures
590bd41d0a8Snordmark  * that the moduleids are processed in order when a zone is created or
591bd41d0a8Snordmark  * destroyed.
592bd41d0a8Snordmark  *
59323f4867fSnordmark  * The applyfn has to drop netstack_g_lock if it does some work.
594bd41d0a8Snordmark  * In that case we don't follow netstack_next,
595bd41d0a8Snordmark  * even if it is possible to do so without any hazards. This is
59623f4867fSnordmark  * because we want the design to allow for the list of netstacks threaded
59723f4867fSnordmark  * by netstack_next to change in any arbitrary way during the time the
59823f4867fSnordmark  * lock was dropped.
59923f4867fSnordmark  *
60023f4867fSnordmark  * It is safe to restart the loop at netstack_head since the applyfn
60123f4867fSnordmark  * changes netstack_m_state as it processes things, so a subsequent
60223f4867fSnordmark  * pass through will have no effect in applyfn, hence the loop will terminate
60323f4867fSnordmark  * in at worst O(N^2).
60423f4867fSnordmark  */
605f4b3ec61Sdh155122 static void
60623f4867fSnordmark apply_all_netstacks(int moduleid, applyfn_t *applyfn)
607f4b3ec61Sdh155122 {
608f4b3ec61Sdh155122 	netstack_t *ns;
609f4b3ec61Sdh155122 
61023f4867fSnordmark 	mutex_enter(&netstack_g_lock);
61123f4867fSnordmark 	ns = netstack_head;
612f4b3ec61Sdh155122 	while (ns != NULL) {
613bd41d0a8Snordmark 		if (wait_for_zone_creator(ns, &netstack_g_lock)) {
61423f4867fSnordmark 			/* Lock dropped - restart at head */
615bd41d0a8Snordmark 			ns = netstack_head;
616bd41d0a8Snordmark 		} else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
617bd41d0a8Snordmark 			/* Lock dropped - restart at head */
61823f4867fSnordmark 			ns = netstack_head;
61923f4867fSnordmark 		} else {
62023f4867fSnordmark 			ns = ns->netstack_next;
62123f4867fSnordmark 		}
62223f4867fSnordmark 	}
62323f4867fSnordmark 	mutex_exit(&netstack_g_lock);
62423f4867fSnordmark }
62523f4867fSnordmark 
62623f4867fSnordmark /*
62723f4867fSnordmark  * Apply a function to all moduleids for a particular netstack.
62823f4867fSnordmark  *
62923f4867fSnordmark  * Since the netstack linkage doesn't matter in this case we can
63023f4867fSnordmark  * ignore whether the function drops the lock.
63123f4867fSnordmark  */
63223f4867fSnordmark static void
63323f4867fSnordmark apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
63423f4867fSnordmark {
63523f4867fSnordmark 	int i;
63623f4867fSnordmark 
63723f4867fSnordmark 	mutex_enter(&netstack_g_lock);
638f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
639f4b3ec61Sdh155122 		/*
640bd41d0a8Snordmark 		 * We don't care whether the lock was dropped
641bd41d0a8Snordmark 		 * since we are not iterating over netstack_head.
642f4b3ec61Sdh155122 		 */
643bd41d0a8Snordmark 		(void) (applyfn)(&netstack_g_lock, ns, i);
644f4b3ec61Sdh155122 	}
64523f4867fSnordmark 	mutex_exit(&netstack_g_lock);
646f4b3ec61Sdh155122 }
647f4b3ec61Sdh155122 
64823f4867fSnordmark /* Like the above but in reverse moduleid order */
649f4b3ec61Sdh155122 static void
65023f4867fSnordmark apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
651f4b3ec61Sdh155122 {
652f4b3ec61Sdh155122 	int i;
653f4b3ec61Sdh155122 
65423f4867fSnordmark 	mutex_enter(&netstack_g_lock);
655f4b3ec61Sdh155122 	for (i = NS_MAX-1; i >= 0; i--) {
656f4b3ec61Sdh155122 		/*
657bd41d0a8Snordmark 		 * We don't care whether the lock was dropped
658bd41d0a8Snordmark 		 * since we are not iterating over netstack_head.
659f4b3ec61Sdh155122 		 */
660bd41d0a8Snordmark 		(void) (applyfn)(&netstack_g_lock, ns, i);
661f4b3ec61Sdh155122 	}
66223f4867fSnordmark 	mutex_exit(&netstack_g_lock);
663f4b3ec61Sdh155122 }
664f4b3ec61Sdh155122 
665f4b3ec61Sdh155122 /*
666bd41d0a8Snordmark  * Call the create function for the ns and moduleid if CREATE_NEEDED
667bd41d0a8Snordmark  * is set.
668bd41d0a8Snordmark  * If some other thread gets here first and sets *_INPROGRESS, then
669bd41d0a8Snordmark  * we wait for that thread to complete so that we can ensure that
670bd41d0a8Snordmark  * all the callbacks are done when we've looped over all netstacks/moduleids.
67123f4867fSnordmark  *
672bd41d0a8Snordmark  * When we call the create function, we temporarily drop the netstack_lock
673bd41d0a8Snordmark  * held by the caller, and return true to tell the caller it needs to
674bd41d0a8Snordmark  * re-evalute the state.
675f4b3ec61Sdh155122  */
676bd41d0a8Snordmark static boolean_t
677bd41d0a8Snordmark netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
678f4b3ec61Sdh155122 {
679bd41d0a8Snordmark 	void *result;
680bd41d0a8Snordmark 	netstackid_t stackid;
681bd41d0a8Snordmark 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
682bd41d0a8Snordmark 	boolean_t dropped = B_FALSE;
683bd41d0a8Snordmark 
684bd41d0a8Snordmark 	ASSERT(MUTEX_HELD(lockp));
685bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
686bd41d0a8Snordmark 
687bd41d0a8Snordmark 	if (wait_for_nms_inprogress(ns, nms, lockp))
688bd41d0a8Snordmark 		dropped = B_TRUE;
689bd41d0a8Snordmark 
690bd41d0a8Snordmark 	if (nms->nms_flags & NSS_CREATE_NEEDED) {
691bd41d0a8Snordmark 		nms->nms_flags &= ~NSS_CREATE_NEEDED;
692bd41d0a8Snordmark 		nms->nms_flags |= NSS_CREATE_INPROGRESS;
693bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__create__inprogress,
694bd41d0a8Snordmark 		    netstack_t *, ns, int, moduleid);
695bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
696bd41d0a8Snordmark 		mutex_exit(lockp);
697bd41d0a8Snordmark 		dropped = B_TRUE;
698bd41d0a8Snordmark 
699bd41d0a8Snordmark 		ASSERT(ns_reg[moduleid].nr_create != NULL);
700bd41d0a8Snordmark 		stackid = ns->netstack_stackid;
701bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__create__start,
702bd41d0a8Snordmark 		    netstackid_t, stackid,
703bd41d0a8Snordmark 		    netstack_t *, ns);
704bd41d0a8Snordmark 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
705bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__create__end,
706bd41d0a8Snordmark 		    void *, result, netstack_t *, ns);
707bd41d0a8Snordmark 
708bd41d0a8Snordmark 		ASSERT(result != NULL);
709bd41d0a8Snordmark 		mutex_enter(lockp);
710bd41d0a8Snordmark 		mutex_enter(&ns->netstack_lock);
711bd41d0a8Snordmark 		ns->netstack_modules[moduleid] = result;
712bd41d0a8Snordmark 		nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
713bd41d0a8Snordmark 		nms->nms_flags |= NSS_CREATE_COMPLETED;
714bd41d0a8Snordmark 		cv_broadcast(&nms->nms_cv);
715bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__create__completed,
716bd41d0a8Snordmark 		    netstack_t *, ns, int, moduleid);
717bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
718bd41d0a8Snordmark 		return (dropped);
71923f4867fSnordmark 	} else {
720bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
721bd41d0a8Snordmark 		return (dropped);
72223f4867fSnordmark 	}
723f4b3ec61Sdh155122 }
724f4b3ec61Sdh155122 
725f4b3ec61Sdh155122 /*
726bd41d0a8Snordmark  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
727bd41d0a8Snordmark  * is set.
728bd41d0a8Snordmark  * If some other thread gets here first and sets *_INPROGRESS, then
729bd41d0a8Snordmark  * we wait for that thread to complete so that we can ensure that
730bd41d0a8Snordmark  * all the callbacks are done when we've looped over all netstacks/moduleids.
731f4b3ec61Sdh155122  *
732bd41d0a8Snordmark  * When we call the shutdown function, we temporarily drop the netstack_lock
733bd41d0a8Snordmark  * held by the caller, and return true to tell the caller it needs to
734bd41d0a8Snordmark  * re-evalute the state.
735f4b3ec61Sdh155122  */
736bd41d0a8Snordmark static boolean_t
737bd41d0a8Snordmark netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
738f4b3ec61Sdh155122 {
739bd41d0a8Snordmark 	netstackid_t stackid;
740bd41d0a8Snordmark 	void * netstack_module;
741bd41d0a8Snordmark 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
742bd41d0a8Snordmark 	boolean_t dropped = B_FALSE;
743bd41d0a8Snordmark 
744bd41d0a8Snordmark 	ASSERT(MUTEX_HELD(lockp));
745bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
746bd41d0a8Snordmark 
747bd41d0a8Snordmark 	if (wait_for_nms_inprogress(ns, nms, lockp))
748bd41d0a8Snordmark 		dropped = B_TRUE;
749bd41d0a8Snordmark 
750bd41d0a8Snordmark 	if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
751bd41d0a8Snordmark 		nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
752bd41d0a8Snordmark 		nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
753bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__shutdown__inprogress,
754bd41d0a8Snordmark 		    netstack_t *, ns, int, moduleid);
755bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
756bd41d0a8Snordmark 		mutex_exit(lockp);
757bd41d0a8Snordmark 		dropped = B_TRUE;
758bd41d0a8Snordmark 
759bd41d0a8Snordmark 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
760bd41d0a8Snordmark 		stackid = ns->netstack_stackid;
761bd41d0a8Snordmark 		netstack_module = ns->netstack_modules[moduleid];
762bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__shutdown__start,
763bd41d0a8Snordmark 		    netstackid_t, stackid,
764bd41d0a8Snordmark 		    void *, netstack_module);
765bd41d0a8Snordmark 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
766bd41d0a8Snordmark 		DTRACE_PROBE1(netstack__shutdown__end,
767bd41d0a8Snordmark 		    netstack_t *, ns);
768bd41d0a8Snordmark 
769bd41d0a8Snordmark 		mutex_enter(lockp);
770bd41d0a8Snordmark 		mutex_enter(&ns->netstack_lock);
771bd41d0a8Snordmark 		nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
772bd41d0a8Snordmark 		nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
773bd41d0a8Snordmark 		cv_broadcast(&nms->nms_cv);
774bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__shutdown__completed,
775bd41d0a8Snordmark 		    netstack_t *, ns, int, moduleid);
776bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
777bd41d0a8Snordmark 		return (dropped);
778bd41d0a8Snordmark 	} else {
779bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
780bd41d0a8Snordmark 		return (dropped);
781bd41d0a8Snordmark 	}
782f4b3ec61Sdh155122 }
783f4b3ec61Sdh155122 
784f4b3ec61Sdh155122 /*
785bd41d0a8Snordmark  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
786bd41d0a8Snordmark  * is set.
787bd41d0a8Snordmark  * If some other thread gets here first and sets *_INPROGRESS, then
788bd41d0a8Snordmark  * we wait for that thread to complete so that we can ensure that
789bd41d0a8Snordmark  * all the callbacks are done when we've looped over all netstacks/moduleids.
790f4b3ec61Sdh155122  *
791bd41d0a8Snordmark  * When we call the destroy function, we temporarily drop the netstack_lock
792bd41d0a8Snordmark  * held by the caller, and return true to tell the caller it needs to
793bd41d0a8Snordmark  * re-evalute the state.
794f4b3ec61Sdh155122  */
795bd41d0a8Snordmark static boolean_t
796bd41d0a8Snordmark netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
797f4b3ec61Sdh155122 {
798bd41d0a8Snordmark 	netstackid_t stackid;
799bd41d0a8Snordmark 	void * netstack_module;
800bd41d0a8Snordmark 	nm_state_t *nms = &ns->netstack_m_state[moduleid];
801bd41d0a8Snordmark 	boolean_t dropped = B_FALSE;
802bd41d0a8Snordmark 
803bd41d0a8Snordmark 	ASSERT(MUTEX_HELD(lockp));
804bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
805bd41d0a8Snordmark 
806bd41d0a8Snordmark 	if (wait_for_nms_inprogress(ns, nms, lockp))
807bd41d0a8Snordmark 		dropped = B_TRUE;
808bd41d0a8Snordmark 
809bd41d0a8Snordmark 	if (nms->nms_flags & NSS_DESTROY_NEEDED) {
810bd41d0a8Snordmark 		nms->nms_flags &= ~NSS_DESTROY_NEEDED;
811bd41d0a8Snordmark 		nms->nms_flags |= NSS_DESTROY_INPROGRESS;
812bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__destroy__inprogress,
813bd41d0a8Snordmark 		    netstack_t *, ns, int, moduleid);
814bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
815bd41d0a8Snordmark 		mutex_exit(lockp);
816bd41d0a8Snordmark 		dropped = B_TRUE;
817bd41d0a8Snordmark 
818bd41d0a8Snordmark 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
819bd41d0a8Snordmark 		stackid = ns->netstack_stackid;
820bd41d0a8Snordmark 		netstack_module = ns->netstack_modules[moduleid];
821bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__destroy__start,
822bd41d0a8Snordmark 		    netstackid_t, stackid,
823bd41d0a8Snordmark 		    void *, netstack_module);
824bd41d0a8Snordmark 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
825bd41d0a8Snordmark 		DTRACE_PROBE1(netstack__destroy__end,
826bd41d0a8Snordmark 		    netstack_t *, ns);
827bd41d0a8Snordmark 
828bd41d0a8Snordmark 		mutex_enter(lockp);
829bd41d0a8Snordmark 		mutex_enter(&ns->netstack_lock);
830bd41d0a8Snordmark 		ns->netstack_modules[moduleid] = NULL;
831bd41d0a8Snordmark 		nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
832bd41d0a8Snordmark 		nms->nms_flags |= NSS_DESTROY_COMPLETED;
833bd41d0a8Snordmark 		cv_broadcast(&nms->nms_cv);
834bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__destroy__completed,
835bd41d0a8Snordmark 		    netstack_t *, ns, int, moduleid);
836bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
837bd41d0a8Snordmark 		return (dropped);
838bd41d0a8Snordmark 	} else {
839bd41d0a8Snordmark 		mutex_exit(&ns->netstack_lock);
840bd41d0a8Snordmark 		return (dropped);
841bd41d0a8Snordmark 	}
842f4b3ec61Sdh155122 }
843f4b3ec61Sdh155122 
844f4b3ec61Sdh155122 /*
845bd41d0a8Snordmark  * If somebody  is creating the netstack (due to a new zone being created)
846bd41d0a8Snordmark  * then we wait for them to complete. This ensures that any additional
847bd41d0a8Snordmark  * netstack_register() doesn't cause the create functions to run out of
848bd41d0a8Snordmark  * order.
849bd41d0a8Snordmark  * Note that we do not need such a global wait in the case of the shutdown
850bd41d0a8Snordmark  * and destroy callbacks, since in that case it is sufficient for both
851bd41d0a8Snordmark  * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
852bd41d0a8Snordmark  * Returns true if lockp was temporarily dropped while waiting.
853f4b3ec61Sdh155122  */
854bd41d0a8Snordmark static boolean_t
855bd41d0a8Snordmark wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
856f4b3ec61Sdh155122 {
857bd41d0a8Snordmark 	boolean_t dropped = B_FALSE;
858bd41d0a8Snordmark 
859bd41d0a8Snordmark 	mutex_enter(&ns->netstack_lock);
860bd41d0a8Snordmark 	while (ns->netstack_flags & NSF_ZONE_CREATE) {
861bd41d0a8Snordmark 		DTRACE_PROBE1(netstack__wait__zone__inprogress,
862bd41d0a8Snordmark 		    netstack_t *, ns);
863bd41d0a8Snordmark 		if (lockp != NULL) {
864bd41d0a8Snordmark 			dropped = B_TRUE;
865bd41d0a8Snordmark 			mutex_exit(lockp);
866bd41d0a8Snordmark 		}
867bd41d0a8Snordmark 		cv_wait(&ns->netstack_cv, &ns->netstack_lock);
868bd41d0a8Snordmark 		if (lockp != NULL) {
869bd41d0a8Snordmark 			/* First drop netstack_lock to preserve order */
870bd41d0a8Snordmark 			mutex_exit(&ns->netstack_lock);
871bd41d0a8Snordmark 			mutex_enter(lockp);
872bd41d0a8Snordmark 			mutex_enter(&ns->netstack_lock);
873bd41d0a8Snordmark 		}
874bd41d0a8Snordmark 	}
875bd41d0a8Snordmark 	mutex_exit(&ns->netstack_lock);
876bd41d0a8Snordmark 	return (dropped);
877bd41d0a8Snordmark }
878bd41d0a8Snordmark 
879f4b3ec61Sdh155122 /*
880bd41d0a8Snordmark  * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
881bd41d0a8Snordmark  * combination.
882bd41d0a8Snordmark  * Returns true if lockp was temporarily dropped while waiting.
883f4b3ec61Sdh155122  */
884bd41d0a8Snordmark static boolean_t
885bd41d0a8Snordmark wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
886bd41d0a8Snordmark {
887bd41d0a8Snordmark 	boolean_t dropped = B_FALSE;
888bd41d0a8Snordmark 
889bd41d0a8Snordmark 	while (nms->nms_flags & NSS_ALL_INPROGRESS) {
890bd41d0a8Snordmark 		DTRACE_PROBE2(netstack__wait__nms__inprogress,
891bd41d0a8Snordmark 		    netstack_t *, ns, nm_state_t *, nms);
892bd41d0a8Snordmark 		if (lockp != NULL) {
893bd41d0a8Snordmark 			dropped = B_TRUE;
894bd41d0a8Snordmark 			mutex_exit(lockp);
895bd41d0a8Snordmark 		}
896bd41d0a8Snordmark 		cv_wait(&nms->nms_cv, &ns->netstack_lock);
897bd41d0a8Snordmark 		if (lockp != NULL) {
898bd41d0a8Snordmark 			/* First drop netstack_lock to preserve order */
899bd41d0a8Snordmark 			mutex_exit(&ns->netstack_lock);
900bd41d0a8Snordmark 			mutex_enter(lockp);
901bd41d0a8Snordmark 			mutex_enter(&ns->netstack_lock);
902bd41d0a8Snordmark 		}
903bd41d0a8Snordmark 	}
904bd41d0a8Snordmark 	return (dropped);
905f4b3ec61Sdh155122 }
906f4b3ec61Sdh155122 
907f4b3ec61Sdh155122 /*
908f4b3ec61Sdh155122  * Get the stack instance used in caller's zone.
909f4b3ec61Sdh155122  * Increases the reference count, caller must do a netstack_rele.
910f4b3ec61Sdh155122  * It can't be called after zone_destroy() has started.
911f4b3ec61Sdh155122  */
912fd006805Snordmark netstack_t *
913f4b3ec61Sdh155122 netstack_get_current(void)
914f4b3ec61Sdh155122 {
915f4b3ec61Sdh155122 	netstack_t *ns;
916f4b3ec61Sdh155122 
917f4b3ec61Sdh155122 	ns = curproc->p_zone->zone_netstack;
918f4b3ec61Sdh155122 	ASSERT(ns != NULL);
919f4b3ec61Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
920f4b3ec61Sdh155122 		return (NULL);
921f4b3ec61Sdh155122 
922f4b3ec61Sdh155122 	netstack_hold(ns);
923f4b3ec61Sdh155122 
924f4b3ec61Sdh155122 	return (ns);
925f4b3ec61Sdh155122 }
926f4b3ec61Sdh155122 
927f4b3ec61Sdh155122 /*
928f4b3ec61Sdh155122  * Find a stack instance given the cred.
929f4b3ec61Sdh155122  * This is used by the modules to potentially allow for a future when
930f4b3ec61Sdh155122  * something other than the zoneid is used to determine the stack.
931f4b3ec61Sdh155122  */
932f4b3ec61Sdh155122 netstack_t *
933f4b3ec61Sdh155122 netstack_find_by_cred(const cred_t *cr)
934f4b3ec61Sdh155122 {
935f4b3ec61Sdh155122 	zoneid_t zoneid = crgetzoneid(cr);
936f4b3ec61Sdh155122 
937f4b3ec61Sdh155122 	/* Handle the case when cr_zone is NULL */
938f4b3ec61Sdh155122 	if (zoneid == (zoneid_t)-1)
939f4b3ec61Sdh155122 		zoneid = GLOBAL_ZONEID;
940f4b3ec61Sdh155122 
941f4b3ec61Sdh155122 	/* For performance ... */
942f4b3ec61Sdh155122 	if (curproc->p_zone->zone_id == zoneid)
943f4b3ec61Sdh155122 		return (netstack_get_current());
944f4b3ec61Sdh155122 	else
945f4b3ec61Sdh155122 		return (netstack_find_by_zoneid(zoneid));
946f4b3ec61Sdh155122 }
947f4b3ec61Sdh155122 
948f4b3ec61Sdh155122 /*
949f4b3ec61Sdh155122  * Find a stack instance given the zoneid.
950f4b3ec61Sdh155122  * Increases the reference count if found; caller must do a
951f4b3ec61Sdh155122  * netstack_rele().
952f4b3ec61Sdh155122  *
953f4b3ec61Sdh155122  * If there is no exact match then assume the shared stack instance
954f4b3ec61Sdh155122  * matches.
955f4b3ec61Sdh155122  *
956f4b3ec61Sdh155122  * Skip the unitialized ones.
957f4b3ec61Sdh155122  */
958f4b3ec61Sdh155122 netstack_t *
959f4b3ec61Sdh155122 netstack_find_by_zoneid(zoneid_t zoneid)
960f4b3ec61Sdh155122 {
961f4b3ec61Sdh155122 	netstack_t *ns;
962f4b3ec61Sdh155122 	zone_t *zone;
963f4b3ec61Sdh155122 
964f4b3ec61Sdh155122 	zone = zone_find_by_id(zoneid);
965f4b3ec61Sdh155122 
966f4b3ec61Sdh155122 	if (zone == NULL)
967f4b3ec61Sdh155122 		return (NULL);
968f4b3ec61Sdh155122 
969f4b3ec61Sdh155122 	ns = zone->zone_netstack;
970f4b3ec61Sdh155122 	ASSERT(ns != NULL);
971f4b3ec61Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
972f4b3ec61Sdh155122 		ns = NULL;
973f4b3ec61Sdh155122 	else
974f4b3ec61Sdh155122 		netstack_hold(ns);
975f4b3ec61Sdh155122 
976f4b3ec61Sdh155122 	zone_rele(zone);
977f4b3ec61Sdh155122 	return (ns);
978f4b3ec61Sdh155122 }
979f4b3ec61Sdh155122 
980f4b3ec61Sdh155122 /*
981bd41d0a8Snordmark  * Find a stack instance given the zoneid. Can only be called from
982bd41d0a8Snordmark  * the create callback. See the comments in zone_find_by_id_nolock why
983bd41d0a8Snordmark  * that limitation exists.
984bd41d0a8Snordmark  *
985f4b3ec61Sdh155122  * Increases the reference count if found; caller must do a
986f4b3ec61Sdh155122  * netstack_rele().
987f4b3ec61Sdh155122  *
988f4b3ec61Sdh155122  * If there is no exact match then assume the shared stack instance
989f4b3ec61Sdh155122  * matches.
990f4b3ec61Sdh155122  *
991f4b3ec61Sdh155122  * Skip the unitialized ones.
992f4b3ec61Sdh155122  */
993f4b3ec61Sdh155122 netstack_t *
994f4b3ec61Sdh155122 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
995f4b3ec61Sdh155122 {
996f4b3ec61Sdh155122 	netstack_t *ns;
997f4b3ec61Sdh155122 	zone_t *zone;
998f4b3ec61Sdh155122 
999f4b3ec61Sdh155122 	zone = zone_find_by_id_nolock(zoneid);
1000f4b3ec61Sdh155122 
1001f4b3ec61Sdh155122 	if (zone == NULL)
1002f4b3ec61Sdh155122 		return (NULL);
1003f4b3ec61Sdh155122 
1004f4b3ec61Sdh155122 	ns = zone->zone_netstack;
1005f4b3ec61Sdh155122 	ASSERT(ns != NULL);
1006f4b3ec61Sdh155122 
1007f4b3ec61Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
1008f4b3ec61Sdh155122 		ns = NULL;
1009f4b3ec61Sdh155122 	else
1010f4b3ec61Sdh155122 		netstack_hold(ns);
1011f4b3ec61Sdh155122 
1012bd41d0a8Snordmark 	/* zone_find_by_id_nolock does not have a hold on the zone */
1013f4b3ec61Sdh155122 	return (ns);
1014f4b3ec61Sdh155122 }
1015f4b3ec61Sdh155122 
1016f4b3ec61Sdh155122 /*
1017f4b3ec61Sdh155122  * Find a stack instance given the stackid with exact match?
1018f4b3ec61Sdh155122  * Increases the reference count if found; caller must do a
1019f4b3ec61Sdh155122  * netstack_rele().
1020f4b3ec61Sdh155122  *
1021f4b3ec61Sdh155122  * Skip the unitialized ones.
1022f4b3ec61Sdh155122  */
1023f4b3ec61Sdh155122 netstack_t *
1024f4b3ec61Sdh155122 netstack_find_by_stackid(netstackid_t stackid)
1025f4b3ec61Sdh155122 {
1026f4b3ec61Sdh155122 	netstack_t *ns;
1027f4b3ec61Sdh155122 
1028f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
1029f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1030f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
1031f4b3ec61Sdh155122 		if (ns->netstack_stackid == stackid &&
1032f4b3ec61Sdh155122 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1033f4b3ec61Sdh155122 			mutex_exit(&ns->netstack_lock);
1034f4b3ec61Sdh155122 			netstack_hold(ns);
1035f4b3ec61Sdh155122 			mutex_exit(&netstack_g_lock);
1036f4b3ec61Sdh155122 			return (ns);
1037f4b3ec61Sdh155122 		}
1038f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
1039f4b3ec61Sdh155122 	}
1040f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
1041f4b3ec61Sdh155122 	return (NULL);
1042f4b3ec61Sdh155122 }
1043f4b3ec61Sdh155122 
1044f4b3ec61Sdh155122 void
1045f4b3ec61Sdh155122 netstack_rele(netstack_t *ns)
1046f4b3ec61Sdh155122 {
1047f4b3ec61Sdh155122 	netstack_t **nsp;
1048f4b3ec61Sdh155122 	boolean_t found;
1049f4b3ec61Sdh155122 	int refcnt, numzones;
1050bd41d0a8Snordmark 	int i;
1051f4b3ec61Sdh155122 
1052f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
1053f4b3ec61Sdh155122 	ASSERT(ns->netstack_refcnt > 0);
1054f4b3ec61Sdh155122 	ns->netstack_refcnt--;
1055f4b3ec61Sdh155122 	/*
1056f4b3ec61Sdh155122 	 * As we drop the lock additional netstack_rele()s can come in
1057f4b3ec61Sdh155122 	 * and decrement the refcnt to zero and free the netstack_t.
1058f4b3ec61Sdh155122 	 * Store pointers in local variables and if we were not the last
1059f4b3ec61Sdh155122 	 * then don't reference the netstack_t after that.
1060f4b3ec61Sdh155122 	 */
1061f4b3ec61Sdh155122 	refcnt = ns->netstack_refcnt;
1062f4b3ec61Sdh155122 	numzones = ns->netstack_numzones;
1063f4b3ec61Sdh155122 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1064f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
1065f4b3ec61Sdh155122 
1066f4b3ec61Sdh155122 	if (refcnt == 0 && numzones == 0) {
1067f4b3ec61Sdh155122 		/*
1068f4b3ec61Sdh155122 		 * Time to call the destroy functions and free up
1069f4b3ec61Sdh155122 		 * the structure
1070f4b3ec61Sdh155122 		 */
1071f4b3ec61Sdh155122 		netstack_stack_inactive(ns);
1072f4b3ec61Sdh155122 
107323f4867fSnordmark 		/* Make sure nothing increased the references */
107423f4867fSnordmark 		ASSERT(ns->netstack_refcnt == 0);
107523f4867fSnordmark 		ASSERT(ns->netstack_numzones == 0);
107623f4867fSnordmark 
1077f4b3ec61Sdh155122 		/* Finally remove from list of netstacks */
1078f4b3ec61Sdh155122 		mutex_enter(&netstack_g_lock);
1079f4b3ec61Sdh155122 		found = B_FALSE;
1080f4b3ec61Sdh155122 		for (nsp = &netstack_head; *nsp != NULL;
1081f4b3ec61Sdh155122 		    nsp = &(*nsp)->netstack_next) {
1082f4b3ec61Sdh155122 			if (*nsp == ns) {
1083f4b3ec61Sdh155122 				*nsp = ns->netstack_next;
1084f4b3ec61Sdh155122 				ns->netstack_next = NULL;
1085f4b3ec61Sdh155122 				found = B_TRUE;
1086f4b3ec61Sdh155122 				break;
1087f4b3ec61Sdh155122 			}
1088f4b3ec61Sdh155122 		}
1089f4b3ec61Sdh155122 		ASSERT(found);
1090f4b3ec61Sdh155122 		mutex_exit(&netstack_g_lock);
1091f4b3ec61Sdh155122 
109223f4867fSnordmark 		/* Make sure nothing increased the references */
109323f4867fSnordmark 		ASSERT(ns->netstack_refcnt == 0);
109423f4867fSnordmark 		ASSERT(ns->netstack_numzones == 0);
109523f4867fSnordmark 
1096f4b3ec61Sdh155122 		ASSERT(ns->netstack_flags & NSF_CLOSING);
1097bd41d0a8Snordmark 
1098bd41d0a8Snordmark 		for (i = 0; i < NS_MAX; i++) {
1099bd41d0a8Snordmark 			nm_state_t *nms = &ns->netstack_m_state[i];
1100bd41d0a8Snordmark 
1101bd41d0a8Snordmark 			cv_destroy(&nms->nms_cv);
1102bd41d0a8Snordmark 		}
1103bd41d0a8Snordmark 		mutex_destroy(&ns->netstack_lock);
1104bd41d0a8Snordmark 		cv_destroy(&ns->netstack_cv);
1105f4b3ec61Sdh155122 		kmem_free(ns, sizeof (*ns));
1106f4b3ec61Sdh155122 	}
1107f4b3ec61Sdh155122 }
1108f4b3ec61Sdh155122 
1109f4b3ec61Sdh155122 void
1110f4b3ec61Sdh155122 netstack_hold(netstack_t *ns)
1111f4b3ec61Sdh155122 {
1112f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
1113f4b3ec61Sdh155122 	ns->netstack_refcnt++;
1114f4b3ec61Sdh155122 	ASSERT(ns->netstack_refcnt > 0);
1115f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
1116f4b3ec61Sdh155122 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1117f4b3ec61Sdh155122 }
1118f4b3ec61Sdh155122 
1119f4b3ec61Sdh155122 /*
1120f4b3ec61Sdh155122  * To support kstat_create_netstack() using kstat_zone_add we need
1121f4b3ec61Sdh155122  * to track both
1122f4b3ec61Sdh155122  *  - all zoneids that use the global/shared stack
1123f4b3ec61Sdh155122  *  - all kstats that have been added for the shared stack
1124f4b3ec61Sdh155122  */
1125f4b3ec61Sdh155122 kstat_t *
1126f4b3ec61Sdh155122 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1127f4b3ec61Sdh155122     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1128f4b3ec61Sdh155122     netstackid_t ks_netstackid)
1129f4b3ec61Sdh155122 {
1130f4b3ec61Sdh155122 	kstat_t *ks;
1131f4b3ec61Sdh155122 
1132f4b3ec61Sdh155122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1133f4b3ec61Sdh155122 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1134f4b3ec61Sdh155122 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1135f4b3ec61Sdh155122 		if (ks != NULL)
1136f4b3ec61Sdh155122 			netstack_shared_kstat_add(ks);
1137f4b3ec61Sdh155122 		return (ks);
1138f4b3ec61Sdh155122 	} else {
1139f4b3ec61Sdh155122 		zoneid_t zoneid = ks_netstackid;
1140f4b3ec61Sdh155122 
1141f4b3ec61Sdh155122 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
1142f4b3ec61Sdh155122 		    ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1143f4b3ec61Sdh155122 	}
1144f4b3ec61Sdh155122 }
1145f4b3ec61Sdh155122 
1146f4b3ec61Sdh155122 void
1147f4b3ec61Sdh155122 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1148f4b3ec61Sdh155122 {
1149f4b3ec61Sdh155122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1150f4b3ec61Sdh155122 		netstack_shared_kstat_remove(ks);
1151f4b3ec61Sdh155122 	}
1152f4b3ec61Sdh155122 	kstat_delete(ks);
1153f4b3ec61Sdh155122 }
1154f4b3ec61Sdh155122 
1155f4b3ec61Sdh155122 static void
1156f4b3ec61Sdh155122 netstack_shared_zone_add(zoneid_t zoneid)
1157f4b3ec61Sdh155122 {
1158f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1159f4b3ec61Sdh155122 	struct shared_kstat_list *sk;
1160f4b3ec61Sdh155122 
1161f4b3ec61Sdh155122 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1162f4b3ec61Sdh155122 	sz->sz_zoneid = zoneid;
1163f4b3ec61Sdh155122 
1164f4b3ec61Sdh155122 	/* Insert in list */
1165f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1166f4b3ec61Sdh155122 	sz->sz_next = netstack_shared_zones;
1167f4b3ec61Sdh155122 	netstack_shared_zones = sz;
1168f4b3ec61Sdh155122 
1169f4b3ec61Sdh155122 	/*
1170f4b3ec61Sdh155122 	 * Perform kstat_zone_add for each existing shared stack kstat.
1171f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1172f4b3ec61Sdh155122 	 */
1173f4b3ec61Sdh155122 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1174f4b3ec61Sdh155122 		kstat_zone_add(sk->sk_kstat, zoneid);
1175f4b3ec61Sdh155122 	}
1176f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1177f4b3ec61Sdh155122 }
1178f4b3ec61Sdh155122 
1179f4b3ec61Sdh155122 static void
1180f4b3ec61Sdh155122 netstack_shared_zone_remove(zoneid_t zoneid)
1181f4b3ec61Sdh155122 {
1182f4b3ec61Sdh155122 	struct shared_zone_list **szp, *sz;
1183f4b3ec61Sdh155122 	struct shared_kstat_list *sk;
1184f4b3ec61Sdh155122 
1185f4b3ec61Sdh155122 	/* Find in list */
1186f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1187f4b3ec61Sdh155122 	sz = NULL;
1188f4b3ec61Sdh155122 	for (szp = &netstack_shared_zones; *szp != NULL;
1189f4b3ec61Sdh155122 	    szp = &((*szp)->sz_next)) {
1190f4b3ec61Sdh155122 		if ((*szp)->sz_zoneid == zoneid) {
1191f4b3ec61Sdh155122 			sz = *szp;
1192f4b3ec61Sdh155122 			break;
1193f4b3ec61Sdh155122 		}
1194f4b3ec61Sdh155122 	}
1195f4b3ec61Sdh155122 	/* We must find it */
1196f4b3ec61Sdh155122 	ASSERT(sz != NULL);
1197f4b3ec61Sdh155122 	*szp = sz->sz_next;
1198f4b3ec61Sdh155122 	sz->sz_next = NULL;
1199f4b3ec61Sdh155122 
1200f4b3ec61Sdh155122 	/*
1201f4b3ec61Sdh155122 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1202f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1203f4b3ec61Sdh155122 	 */
1204f4b3ec61Sdh155122 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1205f4b3ec61Sdh155122 		kstat_zone_remove(sk->sk_kstat, zoneid);
1206f4b3ec61Sdh155122 	}
1207f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1208f4b3ec61Sdh155122 
1209f4b3ec61Sdh155122 	kmem_free(sz, sizeof (*sz));
1210f4b3ec61Sdh155122 }
1211f4b3ec61Sdh155122 
1212f4b3ec61Sdh155122 static void
1213f4b3ec61Sdh155122 netstack_shared_kstat_add(kstat_t *ks)
1214f4b3ec61Sdh155122 {
1215f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1216f4b3ec61Sdh155122 	struct shared_kstat_list *sk;
1217f4b3ec61Sdh155122 
1218f4b3ec61Sdh155122 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1219f4b3ec61Sdh155122 	sk->sk_kstat = ks;
1220f4b3ec61Sdh155122 
1221f4b3ec61Sdh155122 	/* Insert in list */
1222f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1223f4b3ec61Sdh155122 	sk->sk_next = netstack_shared_kstats;
1224f4b3ec61Sdh155122 	netstack_shared_kstats = sk;
1225f4b3ec61Sdh155122 
1226f4b3ec61Sdh155122 	/*
1227f4b3ec61Sdh155122 	 * Perform kstat_zone_add for each existing shared stack zone.
1228f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1229f4b3ec61Sdh155122 	 */
1230f4b3ec61Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1231f4b3ec61Sdh155122 		kstat_zone_add(ks, sz->sz_zoneid);
1232f4b3ec61Sdh155122 	}
1233f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1234f4b3ec61Sdh155122 }
1235f4b3ec61Sdh155122 
1236f4b3ec61Sdh155122 static void
1237f4b3ec61Sdh155122 netstack_shared_kstat_remove(kstat_t *ks)
1238f4b3ec61Sdh155122 {
1239f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1240f4b3ec61Sdh155122 	struct shared_kstat_list **skp, *sk;
1241f4b3ec61Sdh155122 
1242f4b3ec61Sdh155122 	/* Find in list */
1243f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1244f4b3ec61Sdh155122 	sk = NULL;
1245f4b3ec61Sdh155122 	for (skp = &netstack_shared_kstats; *skp != NULL;
1246f4b3ec61Sdh155122 	    skp = &((*skp)->sk_next)) {
1247f4b3ec61Sdh155122 		if ((*skp)->sk_kstat == ks) {
1248f4b3ec61Sdh155122 			sk = *skp;
1249f4b3ec61Sdh155122 			break;
1250f4b3ec61Sdh155122 		}
1251f4b3ec61Sdh155122 	}
1252f4b3ec61Sdh155122 	/* Must find it */
1253f4b3ec61Sdh155122 	ASSERT(sk != NULL);
1254f4b3ec61Sdh155122 	*skp = sk->sk_next;
1255f4b3ec61Sdh155122 	sk->sk_next = NULL;
1256f4b3ec61Sdh155122 
1257f4b3ec61Sdh155122 	/*
1258f4b3ec61Sdh155122 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1259f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1260f4b3ec61Sdh155122 	 */
1261f4b3ec61Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1262f4b3ec61Sdh155122 		kstat_zone_remove(ks, sz->sz_zoneid);
1263f4b3ec61Sdh155122 	}
1264f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1265f4b3ec61Sdh155122 	kmem_free(sk, sizeof (*sk));
1266f4b3ec61Sdh155122 }
1267f4b3ec61Sdh155122 
1268f4b3ec61Sdh155122 /*
1269f4b3ec61Sdh155122  * If a zoneid is part of the shared zone, return true
1270f4b3ec61Sdh155122  */
1271f4b3ec61Sdh155122 static boolean_t
1272f4b3ec61Sdh155122 netstack_find_shared_zoneid(zoneid_t zoneid)
1273f4b3ec61Sdh155122 {
1274f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1275f4b3ec61Sdh155122 
1276f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1277f4b3ec61Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1278f4b3ec61Sdh155122 		if (sz->sz_zoneid == zoneid) {
1279f4b3ec61Sdh155122 			mutex_exit(&netstack_shared_lock);
1280f4b3ec61Sdh155122 			return (B_TRUE);
1281f4b3ec61Sdh155122 		}
1282f4b3ec61Sdh155122 	}
1283f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1284f4b3ec61Sdh155122 	return (B_FALSE);
1285f4b3ec61Sdh155122 }
1286f4b3ec61Sdh155122 
1287f4b3ec61Sdh155122 /*
1288f4b3ec61Sdh155122  * Hide the fact that zoneids and netstackids are allocated from
1289f4b3ec61Sdh155122  * the same space in the current implementation.
1290bd41d0a8Snordmark  * We currently do not check that the stackid/zoneids are valid, since there
1291bd41d0a8Snordmark  * is no need for that. But this should only be done for ids that are
1292bd41d0a8Snordmark  * valid.
1293f4b3ec61Sdh155122  */
1294f4b3ec61Sdh155122 zoneid_t
1295f4b3ec61Sdh155122 netstackid_to_zoneid(netstackid_t stackid)
1296f4b3ec61Sdh155122 {
1297f4b3ec61Sdh155122 	return (stackid);
1298f4b3ec61Sdh155122 }
1299f4b3ec61Sdh155122 
1300f4b3ec61Sdh155122 netstackid_t
1301f4b3ec61Sdh155122 zoneid_to_netstackid(zoneid_t zoneid)
1302f4b3ec61Sdh155122 {
1303f4b3ec61Sdh155122 	if (netstack_find_shared_zoneid(zoneid))
1304f4b3ec61Sdh155122 		return (GLOBAL_ZONEID);
1305f4b3ec61Sdh155122 	else
1306f4b3ec61Sdh155122 		return (zoneid);
1307f4b3ec61Sdh155122 }
1308f4b3ec61Sdh155122 
13090a0e9771SDarren Reed zoneid_t
13100a0e9771SDarren Reed netstack_get_zoneid(netstack_t *ns)
13110a0e9771SDarren Reed {
13120a0e9771SDarren Reed 	return (netstackid_to_zoneid(ns->netstack_stackid));
13130a0e9771SDarren Reed }
13140a0e9771SDarren Reed 
1315f4b3ec61Sdh155122 /*
1316f4b3ec61Sdh155122  * Simplistic support for walking all the handles.
1317f4b3ec61Sdh155122  * Example usage:
1318f4b3ec61Sdh155122  *	netstack_handle_t nh;
1319f4b3ec61Sdh155122  *	netstack_t *ns;
1320f4b3ec61Sdh155122  *
1321f4b3ec61Sdh155122  *	netstack_next_init(&nh);
1322f4b3ec61Sdh155122  *	while ((ns = netstack_next(&nh)) != NULL) {
1323f4b3ec61Sdh155122  *		do something;
1324f4b3ec61Sdh155122  *		netstack_rele(ns);
1325f4b3ec61Sdh155122  *	}
1326f4b3ec61Sdh155122  *	netstack_next_fini(&nh);
1327f4b3ec61Sdh155122  */
1328f4b3ec61Sdh155122 void
1329f4b3ec61Sdh155122 netstack_next_init(netstack_handle_t *handle)
1330f4b3ec61Sdh155122 {
1331f4b3ec61Sdh155122 	*handle = 0;
1332f4b3ec61Sdh155122 }
1333f4b3ec61Sdh155122 
1334f4b3ec61Sdh155122 /* ARGSUSED */
1335f4b3ec61Sdh155122 void
1336f4b3ec61Sdh155122 netstack_next_fini(netstack_handle_t *handle)
1337f4b3ec61Sdh155122 {
1338f4b3ec61Sdh155122 }
1339f4b3ec61Sdh155122 
1340f4b3ec61Sdh155122 netstack_t *
1341f4b3ec61Sdh155122 netstack_next(netstack_handle_t *handle)
1342f4b3ec61Sdh155122 {
1343f4b3ec61Sdh155122 	netstack_t *ns;
1344f4b3ec61Sdh155122 	int i, end;
1345f4b3ec61Sdh155122 
1346f4b3ec61Sdh155122 	end = *handle;
1347f4b3ec61Sdh155122 	/* Walk skipping *handle number of instances */
1348f4b3ec61Sdh155122 
1349f4b3ec61Sdh155122 	/* Look if there is a matching stack instance */
1350f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
1351f4b3ec61Sdh155122 	ns = netstack_head;
1352f4b3ec61Sdh155122 	for (i = 0; i < end; i++) {
1353f4b3ec61Sdh155122 		if (ns == NULL)
1354f4b3ec61Sdh155122 			break;
1355f4b3ec61Sdh155122 		ns = ns->netstack_next;
1356f4b3ec61Sdh155122 	}
1357f4b3ec61Sdh155122 	/* skip those with that aren't really here */
1358f4b3ec61Sdh155122 	while (ns != NULL) {
1359f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
1360f4b3ec61Sdh155122 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1361f4b3ec61Sdh155122 			mutex_exit(&ns->netstack_lock);
1362f4b3ec61Sdh155122 			break;
1363f4b3ec61Sdh155122 		}
1364f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
1365f4b3ec61Sdh155122 		end++;
1366f4b3ec61Sdh155122 		ns = ns->netstack_next;
1367f4b3ec61Sdh155122 	}
1368f4b3ec61Sdh155122 	if (ns != NULL) {
1369f4b3ec61Sdh155122 		*handle = end + 1;
1370f4b3ec61Sdh155122 		netstack_hold(ns);
1371f4b3ec61Sdh155122 	}
1372f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
1373f4b3ec61Sdh155122 	return (ns);
1374f4b3ec61Sdh155122 }
1375