xref: /titanic_53/usr/src/uts/common/os/netstack.c (revision f4b3ec61df05330d25f55a36b975b4d7519fdeb1)
1*f4b3ec61Sdh155122 /*
2*f4b3ec61Sdh155122  * CDDL HEADER START
3*f4b3ec61Sdh155122  *
4*f4b3ec61Sdh155122  * The contents of this file are subject to the terms of the
5*f4b3ec61Sdh155122  * Common Development and Distribution License (the "License").
6*f4b3ec61Sdh155122  * You may not use this file except in compliance with the License.
7*f4b3ec61Sdh155122  *
8*f4b3ec61Sdh155122  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*f4b3ec61Sdh155122  * or http://www.opensolaris.org/os/licensing.
10*f4b3ec61Sdh155122  * See the License for the specific language governing permissions
11*f4b3ec61Sdh155122  * and limitations under the License.
12*f4b3ec61Sdh155122  *
13*f4b3ec61Sdh155122  * When distributing Covered Code, include this CDDL HEADER in each
14*f4b3ec61Sdh155122  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*f4b3ec61Sdh155122  * If applicable, add the following below this CDDL HEADER, with the
16*f4b3ec61Sdh155122  * fields enclosed by brackets "[]" replaced with your own identifying
17*f4b3ec61Sdh155122  * information: Portions Copyright [yyyy] [name of copyright owner]
18*f4b3ec61Sdh155122  *
19*f4b3ec61Sdh155122  * CDDL HEADER END
20*f4b3ec61Sdh155122  */
21*f4b3ec61Sdh155122 
22*f4b3ec61Sdh155122 /*
23*f4b3ec61Sdh155122  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24*f4b3ec61Sdh155122  * Use is subject to license terms.
25*f4b3ec61Sdh155122  */
26*f4b3ec61Sdh155122 
27*f4b3ec61Sdh155122 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*f4b3ec61Sdh155122 
29*f4b3ec61Sdh155122 #include <sys/param.h>
30*f4b3ec61Sdh155122 #include <sys/sysmacros.h>
31*f4b3ec61Sdh155122 #include <sys/vm.h>
32*f4b3ec61Sdh155122 #include <sys/proc.h>
33*f4b3ec61Sdh155122 #include <sys/tuneable.h>
34*f4b3ec61Sdh155122 #include <sys/systm.h>
35*f4b3ec61Sdh155122 #include <sys/cmn_err.h>
36*f4b3ec61Sdh155122 #include <sys/debug.h>
37*f4b3ec61Sdh155122 #include <sys/sdt.h>
38*f4b3ec61Sdh155122 #include <sys/mutex.h>
39*f4b3ec61Sdh155122 #include <sys/bitmap.h>
40*f4b3ec61Sdh155122 #include <sys/atomic.h>
41*f4b3ec61Sdh155122 #include <sys/kobj.h>
42*f4b3ec61Sdh155122 #include <sys/disp.h>
43*f4b3ec61Sdh155122 #include <vm/seg_kmem.h>
44*f4b3ec61Sdh155122 #include <sys/zone.h>
45*f4b3ec61Sdh155122 #include <sys/netstack.h>
46*f4b3ec61Sdh155122 
47*f4b3ec61Sdh155122 /*
48*f4b3ec61Sdh155122  * What we use so that the zones framework can tell us about new zones,
49*f4b3ec61Sdh155122  * which we use to create new stacks.
50*f4b3ec61Sdh155122  */
51*f4b3ec61Sdh155122 static zone_key_t netstack_zone_key;
52*f4b3ec61Sdh155122 
53*f4b3ec61Sdh155122 static int	netstack_initialized = 0;
54*f4b3ec61Sdh155122 
55*f4b3ec61Sdh155122 /*
56*f4b3ec61Sdh155122  * Track the registered netstacks.
57*f4b3ec61Sdh155122  * The global lock protects
58*f4b3ec61Sdh155122  * - ns_reg
59*f4b3ec61Sdh155122  * - the list starting at netstack_head and following the netstack_next
60*f4b3ec61Sdh155122  *   pointers.
61*f4b3ec61Sdh155122  */
62*f4b3ec61Sdh155122 static kmutex_t netstack_g_lock;
63*f4b3ec61Sdh155122 
64*f4b3ec61Sdh155122 /*
65*f4b3ec61Sdh155122  * Registry of netstacks with their create/shutdown/destory functions.
66*f4b3ec61Sdh155122  */
67*f4b3ec61Sdh155122 static struct netstack_registry	ns_reg[NS_MAX];
68*f4b3ec61Sdh155122 
69*f4b3ec61Sdh155122 /*
70*f4b3ec61Sdh155122  * Global list of existing stacks.  We use this when a new zone with
71*f4b3ec61Sdh155122  * an exclusive IP instance is created.
72*f4b3ec61Sdh155122  *
73*f4b3ec61Sdh155122  * Note that in some cases a netstack_t needs to stay around after the zone
74*f4b3ec61Sdh155122  * has gone away. This is because there might be outstanding references
75*f4b3ec61Sdh155122  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76*f4b3ec61Sdh155122  * structure and all the foo_stack_t's hanging off of it will be cleaned up
77*f4b3ec61Sdh155122  * when the last reference to it is dropped.
78*f4b3ec61Sdh155122  * However, the same zone might be rebooted. That is handled using the
79*f4b3ec61Sdh155122  * assumption that the zones framework picks a new zoneid each time a zone
80*f4b3ec61Sdh155122  * is (re)booted. We assert for that condition in netstack_zone_create().
81*f4b3ec61Sdh155122  * Thus the old netstack_t can take its time for things to time out.
82*f4b3ec61Sdh155122  */
83*f4b3ec61Sdh155122 static netstack_t *netstack_head;
84*f4b3ec61Sdh155122 
85*f4b3ec61Sdh155122 /*
86*f4b3ec61Sdh155122  * To support kstat_create_netstack() using kstat_zone_add we need
87*f4b3ec61Sdh155122  * to track both
88*f4b3ec61Sdh155122  *  - all zoneids that use the global/shared stack
89*f4b3ec61Sdh155122  *  - all kstats that have been added for the shared stack
90*f4b3ec61Sdh155122  */
91*f4b3ec61Sdh155122 struct shared_zone_list {
92*f4b3ec61Sdh155122 	struct shared_zone_list *sz_next;
93*f4b3ec61Sdh155122 	zoneid_t		sz_zoneid;
94*f4b3ec61Sdh155122 };
95*f4b3ec61Sdh155122 
96*f4b3ec61Sdh155122 struct shared_kstat_list {
97*f4b3ec61Sdh155122 	struct shared_kstat_list *sk_next;
98*f4b3ec61Sdh155122 	kstat_t			 *sk_kstat;
99*f4b3ec61Sdh155122 };
100*f4b3ec61Sdh155122 
101*f4b3ec61Sdh155122 static kmutex_t netstack_shared_lock;	/* protects the following two */
102*f4b3ec61Sdh155122 static struct shared_zone_list	*netstack_shared_zones;
103*f4b3ec61Sdh155122 static struct shared_kstat_list	*netstack_shared_kstats;
104*f4b3ec61Sdh155122 
105*f4b3ec61Sdh155122 static void	*netstack_zone_create(zoneid_t zoneid);
106*f4b3ec61Sdh155122 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107*f4b3ec61Sdh155122 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
108*f4b3ec61Sdh155122 
109*f4b3ec61Sdh155122 static void	netstack_do_create(void);
110*f4b3ec61Sdh155122 static void	netstack_do_shutdown(void);
111*f4b3ec61Sdh155122 static void	netstack_do_destroy(void);
112*f4b3ec61Sdh155122 
113*f4b3ec61Sdh155122 static void	netstack_shared_zone_add(zoneid_t zoneid);
114*f4b3ec61Sdh155122 static void	netstack_shared_zone_remove(zoneid_t zoneid);
115*f4b3ec61Sdh155122 static void	netstack_shared_kstat_add(kstat_t *ks);
116*f4b3ec61Sdh155122 static void	netstack_shared_kstat_remove(kstat_t *ks);
117*f4b3ec61Sdh155122 
118*f4b3ec61Sdh155122 
119*f4b3ec61Sdh155122 void
120*f4b3ec61Sdh155122 netstack_init(void)
121*f4b3ec61Sdh155122 {
122*f4b3ec61Sdh155122 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
123*f4b3ec61Sdh155122 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
124*f4b3ec61Sdh155122 
125*f4b3ec61Sdh155122 	netstack_initialized = 1;
126*f4b3ec61Sdh155122 
127*f4b3ec61Sdh155122 	/*
128*f4b3ec61Sdh155122 	 * We want to be informed each time a zone is created or
129*f4b3ec61Sdh155122 	 * destroyed in the kernel, so we can maintain the
130*f4b3ec61Sdh155122 	 * stack instance information.
131*f4b3ec61Sdh155122 	 */
132*f4b3ec61Sdh155122 	zone_key_create(&netstack_zone_key, netstack_zone_create,
133*f4b3ec61Sdh155122 	    netstack_zone_shutdown, netstack_zone_destroy);
134*f4b3ec61Sdh155122 }
135*f4b3ec61Sdh155122 
136*f4b3ec61Sdh155122 /*
137*f4b3ec61Sdh155122  * Register a new module with the framework.
138*f4b3ec61Sdh155122  * This registers interest in changes to the set of netstacks.
139*f4b3ec61Sdh155122  * The createfn and destroyfn are required, but the shutdownfn can be
140*f4b3ec61Sdh155122  * NULL.
141*f4b3ec61Sdh155122  * Note that due to the current zsd implementation, when the create
142*f4b3ec61Sdh155122  * function is called the zone isn't fully present, thus functions
143*f4b3ec61Sdh155122  * like zone_find_by_* will fail, hence the create function can not
144*f4b3ec61Sdh155122  * use many zones kernel functions including zcmn_err().
145*f4b3ec61Sdh155122  */
146*f4b3ec61Sdh155122 void
147*f4b3ec61Sdh155122 netstack_register(int moduleid,
148*f4b3ec61Sdh155122     void *(*module_create)(netstackid_t, netstack_t *),
149*f4b3ec61Sdh155122     void (*module_shutdown)(netstackid_t, void *),
150*f4b3ec61Sdh155122     void (*module_destroy)(netstackid_t, void *))
151*f4b3ec61Sdh155122 {
152*f4b3ec61Sdh155122 	netstack_t *ns;
153*f4b3ec61Sdh155122 
154*f4b3ec61Sdh155122 	ASSERT(netstack_initialized);
155*f4b3ec61Sdh155122 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
156*f4b3ec61Sdh155122 	ASSERT(module_create != NULL);
157*f4b3ec61Sdh155122 
158*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
159*f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_create == NULL);
160*f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_flags == 0);
161*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_create = module_create;
162*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_shutdown = module_shutdown;
163*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_destroy = module_destroy;
164*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
165*f4b3ec61Sdh155122 
166*f4b3ec61Sdh155122 	/*
167*f4b3ec61Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
168*f4b3ec61Sdh155122 	 * Set CREATE_NEEDED for each of those.
169*f4b3ec61Sdh155122 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
170*f4b3ec61Sdh155122 	 * set, but check NSF_CLOSING to be sure.
171*f4b3ec61Sdh155122 	 */
172*f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
173*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
174*f4b3ec61Sdh155122 		if (!(ns->netstack_flags & NSF_CLOSING) &&
175*f4b3ec61Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_ALL) == 0) {
176*f4b3ec61Sdh155122 			ns->netstack_m_state[moduleid] |= NSS_CREATE_NEEDED;
177*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__create__needed,
178*f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
179*f4b3ec61Sdh155122 		}
180*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
181*f4b3ec61Sdh155122 	}
182*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
183*f4b3ec61Sdh155122 
184*f4b3ec61Sdh155122 	/*
185*f4b3ec61Sdh155122 	 * Call the create function for each stack that has CREATE_NEEDED.
186*f4b3ec61Sdh155122 	 * Set CREATE_INPROGRESS, drop lock, and after done,
187*f4b3ec61Sdh155122 	 * set CREATE_COMPLETE
188*f4b3ec61Sdh155122 	 */
189*f4b3ec61Sdh155122 	netstack_do_create();
190*f4b3ec61Sdh155122 }
191*f4b3ec61Sdh155122 
192*f4b3ec61Sdh155122 void
193*f4b3ec61Sdh155122 netstack_unregister(int moduleid)
194*f4b3ec61Sdh155122 {
195*f4b3ec61Sdh155122 	netstack_t *ns;
196*f4b3ec61Sdh155122 
197*f4b3ec61Sdh155122 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
198*f4b3ec61Sdh155122 
199*f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_create != NULL);
200*f4b3ec61Sdh155122 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
201*f4b3ec61Sdh155122 
202*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
203*f4b3ec61Sdh155122 	/*
204*f4b3ec61Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
205*f4b3ec61Sdh155122 	 * Set SHUTDOWN_NEEDED and DESTROY_NEEDED for each of those.
206*f4b3ec61Sdh155122 	 */
207*f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
208*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
209*f4b3ec61Sdh155122 		if (ns_reg[moduleid].nr_shutdown != NULL &&
210*f4b3ec61Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
211*f4b3ec61Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_ALL) == 0) {
212*f4b3ec61Sdh155122 			ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_NEEDED;
213*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
214*f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
215*f4b3ec61Sdh155122 		}
216*f4b3ec61Sdh155122 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
217*f4b3ec61Sdh155122 		    ns_reg[moduleid].nr_destroy != NULL &&
218*f4b3ec61Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
219*f4b3ec61Sdh155122 		    (ns->netstack_m_state[moduleid] & NSS_DESTROY_ALL) == 0) {
220*f4b3ec61Sdh155122 			ns->netstack_m_state[moduleid] |= NSS_DESTROY_NEEDED;
221*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__destroy__needed,
222*f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
223*f4b3ec61Sdh155122 		}
224*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
225*f4b3ec61Sdh155122 	}
226*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
227*f4b3ec61Sdh155122 
228*f4b3ec61Sdh155122 	netstack_do_shutdown();
229*f4b3ec61Sdh155122 	netstack_do_destroy();
230*f4b3ec61Sdh155122 
231*f4b3ec61Sdh155122 	/*
232*f4b3ec61Sdh155122 	 * Clear the netstack_m_state so that we can handle this module
233*f4b3ec61Sdh155122 	 * being loaded again.
234*f4b3ec61Sdh155122 	 */
235*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
236*f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
237*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
238*f4b3ec61Sdh155122 		if (ns->netstack_m_state[moduleid] & NSS_DESTROY_COMPLETED) {
239*f4b3ec61Sdh155122 			ns->netstack_m_state[moduleid] = 0;
240*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__destroy__done,
241*f4b3ec61Sdh155122 			    netstack_t *, ns, int, moduleid);
242*f4b3ec61Sdh155122 		}
243*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
244*f4b3ec61Sdh155122 	}
245*f4b3ec61Sdh155122 
246*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_create = NULL;
247*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_shutdown = NULL;
248*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_destroy = NULL;
249*f4b3ec61Sdh155122 	ns_reg[moduleid].nr_flags = 0;
250*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
251*f4b3ec61Sdh155122 }
252*f4b3ec61Sdh155122 
253*f4b3ec61Sdh155122 /*
254*f4b3ec61Sdh155122  * Lookup and/or allocate a netstack for this zone.
255*f4b3ec61Sdh155122  */
256*f4b3ec61Sdh155122 static void *
257*f4b3ec61Sdh155122 netstack_zone_create(zoneid_t zoneid)
258*f4b3ec61Sdh155122 {
259*f4b3ec61Sdh155122 	netstackid_t stackid;
260*f4b3ec61Sdh155122 	netstack_t *ns;
261*f4b3ec61Sdh155122 	netstack_t **nsp;
262*f4b3ec61Sdh155122 	zone_t	*zone;
263*f4b3ec61Sdh155122 	int i;
264*f4b3ec61Sdh155122 
265*f4b3ec61Sdh155122 	ASSERT(netstack_initialized);
266*f4b3ec61Sdh155122 
267*f4b3ec61Sdh155122 	zone = zone_find_by_id_nolock(zoneid);
268*f4b3ec61Sdh155122 	ASSERT(zone != NULL);
269*f4b3ec61Sdh155122 
270*f4b3ec61Sdh155122 	if (zone->zone_flags & ZF_NET_EXCL) {
271*f4b3ec61Sdh155122 		stackid = zoneid;
272*f4b3ec61Sdh155122 	} else {
273*f4b3ec61Sdh155122 		/* Look for the stack instance for the global */
274*f4b3ec61Sdh155122 		stackid = GLOBAL_NETSTACKID;
275*f4b3ec61Sdh155122 	}
276*f4b3ec61Sdh155122 
277*f4b3ec61Sdh155122 	/* Allocate even if it isn't needed; simplifies locking */
278*f4b3ec61Sdh155122 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
279*f4b3ec61Sdh155122 
280*f4b3ec61Sdh155122 	/* Look if there is a matching stack instance */
281*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
282*f4b3ec61Sdh155122 	for (nsp = &netstack_head; *nsp != NULL;
283*f4b3ec61Sdh155122 	    nsp = &((*nsp)->netstack_next)) {
284*f4b3ec61Sdh155122 		if ((*nsp)->netstack_stackid == stackid) {
285*f4b3ec61Sdh155122 			/*
286*f4b3ec61Sdh155122 			 * Should never find a pre-existing exclusive stack
287*f4b3ec61Sdh155122 			 */
288*f4b3ec61Sdh155122 			ASSERT(stackid == GLOBAL_NETSTACKID);
289*f4b3ec61Sdh155122 			kmem_free(ns, sizeof (netstack_t));
290*f4b3ec61Sdh155122 			ns = *nsp;
291*f4b3ec61Sdh155122 			mutex_enter(&ns->netstack_lock);
292*f4b3ec61Sdh155122 			ns->netstack_numzones++;
293*f4b3ec61Sdh155122 			mutex_exit(&ns->netstack_lock);
294*f4b3ec61Sdh155122 			mutex_exit(&netstack_g_lock);
295*f4b3ec61Sdh155122 			DTRACE_PROBE1(netstack__inc__numzones,
296*f4b3ec61Sdh155122 			    netstack_t *, ns);
297*f4b3ec61Sdh155122 			/* Record that we have a new shared stack zone */
298*f4b3ec61Sdh155122 			netstack_shared_zone_add(zoneid);
299*f4b3ec61Sdh155122 			zone->zone_netstack = ns;
300*f4b3ec61Sdh155122 			return (ns);
301*f4b3ec61Sdh155122 		}
302*f4b3ec61Sdh155122 	}
303*f4b3ec61Sdh155122 	/* Not found */
304*f4b3ec61Sdh155122 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
305*f4b3ec61Sdh155122 	ns->netstack_stackid = zoneid;
306*f4b3ec61Sdh155122 	ns->netstack_numzones = 1;
307*f4b3ec61Sdh155122 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
308*f4b3ec61Sdh155122 	ns->netstack_flags = NSF_UNINIT;
309*f4b3ec61Sdh155122 	*nsp = ns;
310*f4b3ec61Sdh155122 	zone->zone_netstack = ns;
311*f4b3ec61Sdh155122 
312*f4b3ec61Sdh155122 	/*
313*f4b3ec61Sdh155122 	 * Determine the set of module create functions that need to be
314*f4b3ec61Sdh155122 	 * called before we drop the lock.
315*f4b3ec61Sdh155122 	 */
316*f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
317*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
318*f4b3ec61Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
319*f4b3ec61Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_ALL) == 0) {
320*f4b3ec61Sdh155122 			ns->netstack_m_state[i] |= NSS_CREATE_NEEDED;
321*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__create__needed,
322*f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
323*f4b3ec61Sdh155122 		}
324*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
325*f4b3ec61Sdh155122 	}
326*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
327*f4b3ec61Sdh155122 
328*f4b3ec61Sdh155122 	netstack_do_create();
329*f4b3ec61Sdh155122 
330*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
331*f4b3ec61Sdh155122 	ns->netstack_flags &= ~NSF_UNINIT;
332*f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
333*f4b3ec61Sdh155122 
334*f4b3ec61Sdh155122 	return (ns);
335*f4b3ec61Sdh155122 }
336*f4b3ec61Sdh155122 
337*f4b3ec61Sdh155122 /* ARGSUSED */
338*f4b3ec61Sdh155122 static void
339*f4b3ec61Sdh155122 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
340*f4b3ec61Sdh155122 {
341*f4b3ec61Sdh155122 	netstack_t *ns = (netstack_t *)arg;
342*f4b3ec61Sdh155122 	int i;
343*f4b3ec61Sdh155122 
344*f4b3ec61Sdh155122 	ASSERT(arg != NULL);
345*f4b3ec61Sdh155122 
346*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
347*f4b3ec61Sdh155122 	ASSERT(ns->netstack_numzones > 0);
348*f4b3ec61Sdh155122 	if (ns->netstack_numzones != 1) {
349*f4b3ec61Sdh155122 		/* Stack instance being used by other zone */
350*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
351*f4b3ec61Sdh155122 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
352*f4b3ec61Sdh155122 		return;
353*f4b3ec61Sdh155122 	}
354*f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
355*f4b3ec61Sdh155122 
356*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
357*f4b3ec61Sdh155122 	/*
358*f4b3ec61Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
359*f4b3ec61Sdh155122 	 * Set SHUTDOWN_NEEDED for each of those.
360*f4b3ec61Sdh155122 	 */
361*f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
362*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
363*f4b3ec61Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
364*f4b3ec61Sdh155122 		    ns_reg[i].nr_shutdown != NULL &&
365*f4b3ec61Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
366*f4b3ec61Sdh155122 		    (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
367*f4b3ec61Sdh155122 			ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
368*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
369*f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
370*f4b3ec61Sdh155122 		}
371*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
372*f4b3ec61Sdh155122 	}
373*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
374*f4b3ec61Sdh155122 
375*f4b3ec61Sdh155122 	/* Call the shutdown function for all registered modules */
376*f4b3ec61Sdh155122 	netstack_do_shutdown();
377*f4b3ec61Sdh155122 }
378*f4b3ec61Sdh155122 
379*f4b3ec61Sdh155122 /*
380*f4b3ec61Sdh155122  * Common routine to release a zone.
381*f4b3ec61Sdh155122  * If this was the last zone using the stack instance then prepare to
382*f4b3ec61Sdh155122  * have the refcnt dropping to zero free the zone.
383*f4b3ec61Sdh155122  */
384*f4b3ec61Sdh155122 /* ARGSUSED */
385*f4b3ec61Sdh155122 static void
386*f4b3ec61Sdh155122 netstack_zone_destroy(zoneid_t zoneid, void *arg)
387*f4b3ec61Sdh155122 {
388*f4b3ec61Sdh155122 	netstack_t *ns = (netstack_t *)arg;
389*f4b3ec61Sdh155122 
390*f4b3ec61Sdh155122 	ASSERT(arg != NULL);
391*f4b3ec61Sdh155122 
392*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
393*f4b3ec61Sdh155122 	ASSERT(ns->netstack_numzones > 0);
394*f4b3ec61Sdh155122 	ns->netstack_numzones--;
395*f4b3ec61Sdh155122 	if (ns->netstack_numzones != 0) {
396*f4b3ec61Sdh155122 		/* Stack instance being used by other zone */
397*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
398*f4b3ec61Sdh155122 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
399*f4b3ec61Sdh155122 		/* Record that we a shared stack zone has gone away */
400*f4b3ec61Sdh155122 		netstack_shared_zone_remove(zoneid);
401*f4b3ec61Sdh155122 		return;
402*f4b3ec61Sdh155122 	}
403*f4b3ec61Sdh155122 	/*
404*f4b3ec61Sdh155122 	 * Set CLOSING so that netstack_find_by will not find it
405*f4b3ec61Sdh155122 	 * and decrement the reference count.
406*f4b3ec61Sdh155122 	 */
407*f4b3ec61Sdh155122 	ns->netstack_flags |= NSF_CLOSING;
408*f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
409*f4b3ec61Sdh155122 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
410*f4b3ec61Sdh155122 	/* No other thread can call zone_destroy for this stack */
411*f4b3ec61Sdh155122 
412*f4b3ec61Sdh155122 	/*
413*f4b3ec61Sdh155122 	 * Decrease refcnt to account for the one in netstack_zone_init()
414*f4b3ec61Sdh155122 	 */
415*f4b3ec61Sdh155122 	netstack_rele(ns);
416*f4b3ec61Sdh155122 }
417*f4b3ec61Sdh155122 
418*f4b3ec61Sdh155122 /*
419*f4b3ec61Sdh155122  * Called when the reference count drops to zero.
420*f4b3ec61Sdh155122  * Call the destroy functions for each registered module.
421*f4b3ec61Sdh155122  */
422*f4b3ec61Sdh155122 static void
423*f4b3ec61Sdh155122 netstack_stack_inactive(netstack_t *ns)
424*f4b3ec61Sdh155122 {
425*f4b3ec61Sdh155122 	int i;
426*f4b3ec61Sdh155122 
427*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
428*f4b3ec61Sdh155122 	/*
429*f4b3ec61Sdh155122 	 * If the shutdown callback wasn't called earlier (e.g., if this is
430*f4b3ec61Sdh155122 	 * a netstack shared between multiple zones), then we call it now.
431*f4b3ec61Sdh155122 	 */
432*f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
433*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
434*f4b3ec61Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
435*f4b3ec61Sdh155122 		    ns_reg[i].nr_shutdown != NULL &&
436*f4b3ec61Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
437*f4b3ec61Sdh155122 		    (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
438*f4b3ec61Sdh155122 			ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
439*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__shutdown__needed,
440*f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
441*f4b3ec61Sdh155122 		}
442*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
443*f4b3ec61Sdh155122 	}
444*f4b3ec61Sdh155122 	/*
445*f4b3ec61Sdh155122 	 * Determine the set of stacks that exist before we drop the lock.
446*f4b3ec61Sdh155122 	 * Set DESTROY_NEEDED for each of those.
447*f4b3ec61Sdh155122 	 */
448*f4b3ec61Sdh155122 	for (i = 0; i < NS_MAX; i++) {
449*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
450*f4b3ec61Sdh155122 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
451*f4b3ec61Sdh155122 		    ns_reg[i].nr_destroy != NULL &&
452*f4b3ec61Sdh155122 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
453*f4b3ec61Sdh155122 		    (ns->netstack_m_state[i] & NSS_DESTROY_ALL) == 0) {
454*f4b3ec61Sdh155122 			ns->netstack_m_state[i] |= NSS_DESTROY_NEEDED;
455*f4b3ec61Sdh155122 			DTRACE_PROBE2(netstack__destroy__needed,
456*f4b3ec61Sdh155122 			    netstack_t *, ns, int, i);
457*f4b3ec61Sdh155122 		}
458*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
459*f4b3ec61Sdh155122 	}
460*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
461*f4b3ec61Sdh155122 
462*f4b3ec61Sdh155122 	netstack_do_shutdown();
463*f4b3ec61Sdh155122 	netstack_do_destroy();
464*f4b3ec61Sdh155122 }
465*f4b3ec61Sdh155122 
466*f4b3ec61Sdh155122 /*
467*f4b3ec61Sdh155122  * Call the create function for the ns and moduleid if CREATE_NEEDED
468*f4b3ec61Sdh155122  * is set.
469*f4b3ec61Sdh155122  * When it calls it, it drops the netstack_lock held by the caller,
470*f4b3ec61Sdh155122  * and returns true to tell the caller it needs to re-evalute the
471*f4b3ec61Sdh155122  * state..
472*f4b3ec61Sdh155122  */
473*f4b3ec61Sdh155122 static boolean_t
474*f4b3ec61Sdh155122 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
475*f4b3ec61Sdh155122 {
476*f4b3ec61Sdh155122 	void *result;
477*f4b3ec61Sdh155122 	netstackid_t stackid;
478*f4b3ec61Sdh155122 
479*f4b3ec61Sdh155122 	ASSERT(MUTEX_HELD(lockp));
480*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
481*f4b3ec61Sdh155122 	if (ns->netstack_m_state[moduleid] & NSS_CREATE_NEEDED) {
482*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_CREATE_NEEDED;
483*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_CREATE_INPROGRESS;
484*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__create__inprogress,
485*f4b3ec61Sdh155122 		    netstack_t *, ns, int, moduleid);
486*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
487*f4b3ec61Sdh155122 		mutex_exit(lockp);
488*f4b3ec61Sdh155122 
489*f4b3ec61Sdh155122 		ASSERT(ns_reg[moduleid].nr_create != NULL);
490*f4b3ec61Sdh155122 		stackid = ns->netstack_stackid;
491*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__create__start,
492*f4b3ec61Sdh155122 		    netstackid_t, stackid,
493*f4b3ec61Sdh155122 		    netstack_t *, ns);
494*f4b3ec61Sdh155122 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
495*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__create__end,
496*f4b3ec61Sdh155122 		    void *, result, netstack_t *, ns);
497*f4b3ec61Sdh155122 
498*f4b3ec61Sdh155122 		ASSERT(result != NULL);
499*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
500*f4b3ec61Sdh155122 		ns->netstack_modules[moduleid] = result;
501*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_CREATE_INPROGRESS;
502*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_CREATE_COMPLETED;
503*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__create__completed,
504*f4b3ec61Sdh155122 		    netstack_t *, ns, int, moduleid);
505*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
506*f4b3ec61Sdh155122 		return (B_TRUE);
507*f4b3ec61Sdh155122 	} else {
508*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
509*f4b3ec61Sdh155122 		return (B_FALSE);
510*f4b3ec61Sdh155122 	}
511*f4b3ec61Sdh155122 }
512*f4b3ec61Sdh155122 
513*f4b3ec61Sdh155122 /*
514*f4b3ec61Sdh155122  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
515*f4b3ec61Sdh155122  * is set.
516*f4b3ec61Sdh155122  * When it calls it, it drops the netstack_lock held by the caller,
517*f4b3ec61Sdh155122  * and returns true to tell the caller it needs to re-evalute the
518*f4b3ec61Sdh155122  * state..
519*f4b3ec61Sdh155122  */
520*f4b3ec61Sdh155122 static boolean_t
521*f4b3ec61Sdh155122 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
522*f4b3ec61Sdh155122 {
523*f4b3ec61Sdh155122 	netstackid_t stackid;
524*f4b3ec61Sdh155122 	void * netstack_module;
525*f4b3ec61Sdh155122 
526*f4b3ec61Sdh155122 	ASSERT(MUTEX_HELD(lockp));
527*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
528*f4b3ec61Sdh155122 	if (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_NEEDED) {
529*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_NEEDED;
530*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_INPROGRESS;
531*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__shutdown__inprogress,
532*f4b3ec61Sdh155122 		    netstack_t *, ns, int, moduleid);
533*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
534*f4b3ec61Sdh155122 		mutex_exit(lockp);
535*f4b3ec61Sdh155122 
536*f4b3ec61Sdh155122 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
537*f4b3ec61Sdh155122 		stackid = ns->netstack_stackid;
538*f4b3ec61Sdh155122 		netstack_module = ns->netstack_modules[moduleid];
539*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__shutdown__start,
540*f4b3ec61Sdh155122 		    netstackid_t, stackid,
541*f4b3ec61Sdh155122 		    void *, netstack_module);
542*f4b3ec61Sdh155122 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
543*f4b3ec61Sdh155122 		DTRACE_PROBE1(netstack__shutdown__end,
544*f4b3ec61Sdh155122 		    netstack_t *, ns);
545*f4b3ec61Sdh155122 
546*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
547*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_INPROGRESS;
548*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_COMPLETED;
549*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__shutdown__completed,
550*f4b3ec61Sdh155122 		    netstack_t *, ns, int, moduleid);
551*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
552*f4b3ec61Sdh155122 		return (B_TRUE);
553*f4b3ec61Sdh155122 	} else {
554*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
555*f4b3ec61Sdh155122 		return (B_FALSE);
556*f4b3ec61Sdh155122 	}
557*f4b3ec61Sdh155122 }
558*f4b3ec61Sdh155122 
559*f4b3ec61Sdh155122 /*
560*f4b3ec61Sdh155122  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
561*f4b3ec61Sdh155122  * is set.
562*f4b3ec61Sdh155122  * When it calls it, it drops the netstack_lock held by the caller,
563*f4b3ec61Sdh155122  * and returns true to tell the caller it needs to re-evalute the
564*f4b3ec61Sdh155122  * state..
565*f4b3ec61Sdh155122  */
566*f4b3ec61Sdh155122 static boolean_t
567*f4b3ec61Sdh155122 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
568*f4b3ec61Sdh155122 {
569*f4b3ec61Sdh155122 	netstackid_t stackid;
570*f4b3ec61Sdh155122 	void * netstack_module;
571*f4b3ec61Sdh155122 
572*f4b3ec61Sdh155122 	ASSERT(MUTEX_HELD(lockp));
573*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
574*f4b3ec61Sdh155122 	if (ns->netstack_m_state[moduleid] & NSS_DESTROY_NEEDED) {
575*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_NEEDED;
576*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_DESTROY_INPROGRESS;
577*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__destroy__inprogress,
578*f4b3ec61Sdh155122 		    netstack_t *, ns, int, moduleid);
579*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
580*f4b3ec61Sdh155122 		mutex_exit(lockp);
581*f4b3ec61Sdh155122 
582*f4b3ec61Sdh155122 		/* XXX race against unregister? */
583*f4b3ec61Sdh155122 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
584*f4b3ec61Sdh155122 		stackid = ns->netstack_stackid;
585*f4b3ec61Sdh155122 		netstack_module = ns->netstack_modules[moduleid];
586*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__destroy__start,
587*f4b3ec61Sdh155122 		    netstackid_t, stackid,
588*f4b3ec61Sdh155122 		    void *, netstack_module);
589*f4b3ec61Sdh155122 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
590*f4b3ec61Sdh155122 		DTRACE_PROBE1(netstack__destroy__end,
591*f4b3ec61Sdh155122 		    netstack_t *, ns);
592*f4b3ec61Sdh155122 
593*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
594*f4b3ec61Sdh155122 		ns->netstack_modules[moduleid] = NULL;
595*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_INPROGRESS;
596*f4b3ec61Sdh155122 		ns->netstack_m_state[moduleid] |= NSS_DESTROY_COMPLETED;
597*f4b3ec61Sdh155122 		DTRACE_PROBE2(netstack__destroy__completed,
598*f4b3ec61Sdh155122 		    netstack_t *, ns, int, moduleid);
599*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
600*f4b3ec61Sdh155122 		return (B_TRUE);
601*f4b3ec61Sdh155122 	} else {
602*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
603*f4b3ec61Sdh155122 		return (B_FALSE);
604*f4b3ec61Sdh155122 	}
605*f4b3ec61Sdh155122 }
606*f4b3ec61Sdh155122 
607*f4b3ec61Sdh155122 static void
608*f4b3ec61Sdh155122 apply_loop(netstack_t **headp, kmutex_t *lockp,
609*f4b3ec61Sdh155122     boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
610*f4b3ec61Sdh155122 {
611*f4b3ec61Sdh155122 	netstack_t *ns;
612*f4b3ec61Sdh155122 	int i;
613*f4b3ec61Sdh155122 	boolean_t lock_dropped, result;
614*f4b3ec61Sdh155122 
615*f4b3ec61Sdh155122 	lock_dropped = B_FALSE;
616*f4b3ec61Sdh155122 	ns = *headp;
617*f4b3ec61Sdh155122 	while (ns != NULL) {
618*f4b3ec61Sdh155122 		for (i = 0; i < NS_MAX; i++) {
619*f4b3ec61Sdh155122 			result = (applyfn)(lockp, ns, i);
620*f4b3ec61Sdh155122 			if (result) {
621*f4b3ec61Sdh155122 #ifdef NS_DEBUG
622*f4b3ec61Sdh155122 				(void) printf("netstack_do_apply: "
623*f4b3ec61Sdh155122 				    "LD for %p/%d, %d\n",
624*f4b3ec61Sdh155122 				    (void *)ns, ns->netstack_stackid, i);
625*f4b3ec61Sdh155122 #endif
626*f4b3ec61Sdh155122 				lock_dropped = B_TRUE;
627*f4b3ec61Sdh155122 				mutex_enter(lockp);
628*f4b3ec61Sdh155122 			}
629*f4b3ec61Sdh155122 		}
630*f4b3ec61Sdh155122 		/*
631*f4b3ec61Sdh155122 		 * If at least one applyfn call caused lockp to be dropped,
632*f4b3ec61Sdh155122 		 * then we don't follow netstack_next after reacquiring the
633*f4b3ec61Sdh155122 		 * lock, even if it is possible to do so without any hazards.
634*f4b3ec61Sdh155122 		 * This is because we want the design to allow for the list of
635*f4b3ec61Sdh155122 		 * netstacks threaded by netstack_next to change in any
636*f4b3ec61Sdh155122 		 * arbitrary way during the time the 'lockp' was dropped.
637*f4b3ec61Sdh155122 		 *
638*f4b3ec61Sdh155122 		 * It is safe to restart the loop at *headp since
639*f4b3ec61Sdh155122 		 * the applyfn changes netstack_m_state as it processes
640*f4b3ec61Sdh155122 		 * things, so a subsequent pass through will have no
641*f4b3ec61Sdh155122 		 * effect in applyfn, hence the loop will terminate
642*f4b3ec61Sdh155122 		 * in at worst O(N^2).
643*f4b3ec61Sdh155122 		 */
644*f4b3ec61Sdh155122 		if (lock_dropped) {
645*f4b3ec61Sdh155122 #ifdef NS_DEBUG
646*f4b3ec61Sdh155122 			(void) printf("netstack_do_apply: "
647*f4b3ec61Sdh155122 			    "Lock Dropped for %p/%d, %d\n",
648*f4b3ec61Sdh155122 			    (void *)ns, ns->netstack_stackid, i);
649*f4b3ec61Sdh155122 #endif
650*f4b3ec61Sdh155122 			lock_dropped = B_FALSE;
651*f4b3ec61Sdh155122 			ns = *headp;
652*f4b3ec61Sdh155122 		} else {
653*f4b3ec61Sdh155122 			ns = ns->netstack_next;
654*f4b3ec61Sdh155122 		}
655*f4b3ec61Sdh155122 	}
656*f4b3ec61Sdh155122 }
657*f4b3ec61Sdh155122 
658*f4b3ec61Sdh155122 /* Like above, but in the reverse order of moduleids */
659*f4b3ec61Sdh155122 static void
660*f4b3ec61Sdh155122 apply_loop_reverse(netstack_t **headp, kmutex_t *lockp,
661*f4b3ec61Sdh155122     boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
662*f4b3ec61Sdh155122 {
663*f4b3ec61Sdh155122 	netstack_t *ns;
664*f4b3ec61Sdh155122 	int i;
665*f4b3ec61Sdh155122 	boolean_t lock_dropped, result;
666*f4b3ec61Sdh155122 
667*f4b3ec61Sdh155122 	lock_dropped = B_FALSE;
668*f4b3ec61Sdh155122 	ns = *headp;
669*f4b3ec61Sdh155122 	while (ns != NULL) {
670*f4b3ec61Sdh155122 		for (i = NS_MAX-1; i >= 0; i--) {
671*f4b3ec61Sdh155122 			result = (applyfn)(lockp, ns, i);
672*f4b3ec61Sdh155122 			if (result) {
673*f4b3ec61Sdh155122 #ifdef NS_DEBUG
674*f4b3ec61Sdh155122 				(void) printf("netstack_do_apply: "
675*f4b3ec61Sdh155122 				    "LD for %p/%d, %d\n",
676*f4b3ec61Sdh155122 				    (void *)ns, ns->netstack_stackid, i);
677*f4b3ec61Sdh155122 #endif
678*f4b3ec61Sdh155122 				lock_dropped = B_TRUE;
679*f4b3ec61Sdh155122 				mutex_enter(lockp);
680*f4b3ec61Sdh155122 			}
681*f4b3ec61Sdh155122 		}
682*f4b3ec61Sdh155122 		/*
683*f4b3ec61Sdh155122 		 * If at least one applyfn call caused lockp to be dropped,
684*f4b3ec61Sdh155122 		 * then we don't follow netstack_next after reacquiring the
685*f4b3ec61Sdh155122 		 * lock, even if it is possible to do so without any hazards.
686*f4b3ec61Sdh155122 		 * This is because we want the design to allow for the list of
687*f4b3ec61Sdh155122 		 * netstacks threaded by netstack_next to change in any
688*f4b3ec61Sdh155122 		 * arbitrary way during the time the 'lockp' was dropped.
689*f4b3ec61Sdh155122 		 *
690*f4b3ec61Sdh155122 		 * It is safe to restart the loop at *headp since
691*f4b3ec61Sdh155122 		 * the applyfn changes netstack_m_state as it processes
692*f4b3ec61Sdh155122 		 * things, so a subsequent pass through will have no
693*f4b3ec61Sdh155122 		 * effect in applyfn, hence the loop will terminate
694*f4b3ec61Sdh155122 		 * in at worst O(N^2).
695*f4b3ec61Sdh155122 		 */
696*f4b3ec61Sdh155122 		if (lock_dropped) {
697*f4b3ec61Sdh155122 #ifdef NS_DEBUG
698*f4b3ec61Sdh155122 			(void) printf("netstack_do_apply: "
699*f4b3ec61Sdh155122 			    "Lock Dropped for %p/%d, %d\n",
700*f4b3ec61Sdh155122 			    (void *)ns, ns->netstack_stackid, i);
701*f4b3ec61Sdh155122 #endif
702*f4b3ec61Sdh155122 			lock_dropped = B_FALSE;
703*f4b3ec61Sdh155122 			ns = *headp;
704*f4b3ec61Sdh155122 		} else {
705*f4b3ec61Sdh155122 			ns = ns->netstack_next;
706*f4b3ec61Sdh155122 		}
707*f4b3ec61Sdh155122 	}
708*f4b3ec61Sdh155122 }
709*f4b3ec61Sdh155122 
710*f4b3ec61Sdh155122 /*
711*f4b3ec61Sdh155122  * Apply a function to all module/netstack combinations.
712*f4b3ec61Sdh155122  * The applyfn returns true if it had dropped the locks.
713*f4b3ec61Sdh155122  */
714*f4b3ec61Sdh155122 static void
715*f4b3ec61Sdh155122 netstack_do_apply(int reverse,
716*f4b3ec61Sdh155122     boolean_t (*applyfn)(kmutex_t *, netstack_t *, int moduleid))
717*f4b3ec61Sdh155122 {
718*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
719*f4b3ec61Sdh155122 	if (reverse)
720*f4b3ec61Sdh155122 		apply_loop_reverse(&netstack_head, &netstack_g_lock, applyfn);
721*f4b3ec61Sdh155122 	else
722*f4b3ec61Sdh155122 		apply_loop(&netstack_head, &netstack_g_lock, applyfn);
723*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
724*f4b3ec61Sdh155122 }
725*f4b3ec61Sdh155122 
726*f4b3ec61Sdh155122 /*
727*f4b3ec61Sdh155122  * Run the create function for all modules x stack combinations
728*f4b3ec61Sdh155122  * that have NSS_CREATE_NEEDED set.
729*f4b3ec61Sdh155122  *
730*f4b3ec61Sdh155122  * Call the create function for each stack that has CREATE_NEEDED.
731*f4b3ec61Sdh155122  * Set CREATE_INPROGRESS, drop lock, and after done,
732*f4b3ec61Sdh155122  * set CREATE_COMPLETE
733*f4b3ec61Sdh155122  */
734*f4b3ec61Sdh155122 static void
735*f4b3ec61Sdh155122 netstack_do_create(void)
736*f4b3ec61Sdh155122 {
737*f4b3ec61Sdh155122 	netstack_do_apply(B_FALSE, netstack_apply_create);
738*f4b3ec61Sdh155122 }
739*f4b3ec61Sdh155122 
740*f4b3ec61Sdh155122 /*
741*f4b3ec61Sdh155122  * Run the shutdown function for all modules x stack combinations
742*f4b3ec61Sdh155122  * that have NSS_SHUTDOWN_NEEDED set.
743*f4b3ec61Sdh155122  *
744*f4b3ec61Sdh155122  * Call the shutdown function for each stack that has SHUTDOWN_NEEDED.
745*f4b3ec61Sdh155122  * Set SHUTDOWN_INPROGRESS, drop lock, and after done,
746*f4b3ec61Sdh155122  * set SHUTDOWN_COMPLETE
747*f4b3ec61Sdh155122  */
748*f4b3ec61Sdh155122 static void
749*f4b3ec61Sdh155122 netstack_do_shutdown(void)
750*f4b3ec61Sdh155122 {
751*f4b3ec61Sdh155122 	netstack_do_apply(B_FALSE, netstack_apply_shutdown);
752*f4b3ec61Sdh155122 }
753*f4b3ec61Sdh155122 
754*f4b3ec61Sdh155122 /*
755*f4b3ec61Sdh155122  * Run the destroy function for all modules x stack combinations
756*f4b3ec61Sdh155122  * that have NSS_DESTROY_NEEDED set.
757*f4b3ec61Sdh155122  *
758*f4b3ec61Sdh155122  * Call the destroy function for each stack that has DESTROY_NEEDED.
759*f4b3ec61Sdh155122  * Set DESTROY_INPROGRESS, drop lock, and after done,
760*f4b3ec61Sdh155122  * set DESTROY_COMPLETE
761*f4b3ec61Sdh155122  *
762*f4b3ec61Sdh155122  * Since a netstack_t is never reused (when a zone is rebooted it gets
763*f4b3ec61Sdh155122  * a new zoneid == netstackid i.e. a new netstack_t is allocated) we leave
764*f4b3ec61Sdh155122  * netstack_m_state the way it is i.e. with NSS_DESTROY_COMPLETED set.
765*f4b3ec61Sdh155122  */
766*f4b3ec61Sdh155122 static void
767*f4b3ec61Sdh155122 netstack_do_destroy(void)
768*f4b3ec61Sdh155122 {
769*f4b3ec61Sdh155122 	/*
770*f4b3ec61Sdh155122 	 * Have to walk the moduleids in reverse order since some
771*f4b3ec61Sdh155122 	 * modules make implicit assumptions about the order
772*f4b3ec61Sdh155122 	 */
773*f4b3ec61Sdh155122 	netstack_do_apply(B_TRUE, netstack_apply_destroy);
774*f4b3ec61Sdh155122 }
775*f4b3ec61Sdh155122 
776*f4b3ec61Sdh155122 /*
777*f4b3ec61Sdh155122  * Get the stack instance used in caller's zone.
778*f4b3ec61Sdh155122  * Increases the reference count, caller must do a netstack_rele.
779*f4b3ec61Sdh155122  * It can't be called after zone_destroy() has started.
780*f4b3ec61Sdh155122  */
781*f4b3ec61Sdh155122 static netstack_t *
782*f4b3ec61Sdh155122 netstack_get_current(void)
783*f4b3ec61Sdh155122 {
784*f4b3ec61Sdh155122 	netstack_t *ns;
785*f4b3ec61Sdh155122 
786*f4b3ec61Sdh155122 	ns = curproc->p_zone->zone_netstack;
787*f4b3ec61Sdh155122 	ASSERT(ns != NULL);
788*f4b3ec61Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
789*f4b3ec61Sdh155122 		return (NULL);
790*f4b3ec61Sdh155122 
791*f4b3ec61Sdh155122 	netstack_hold(ns);
792*f4b3ec61Sdh155122 
793*f4b3ec61Sdh155122 	return (ns);
794*f4b3ec61Sdh155122 }
795*f4b3ec61Sdh155122 
796*f4b3ec61Sdh155122 /*
797*f4b3ec61Sdh155122  * Find a stack instance given the cred.
798*f4b3ec61Sdh155122  * This is used by the modules to potentially allow for a future when
799*f4b3ec61Sdh155122  * something other than the zoneid is used to determine the stack.
800*f4b3ec61Sdh155122  */
801*f4b3ec61Sdh155122 netstack_t *
802*f4b3ec61Sdh155122 netstack_find_by_cred(const cred_t *cr)
803*f4b3ec61Sdh155122 {
804*f4b3ec61Sdh155122 	zoneid_t zoneid = crgetzoneid(cr);
805*f4b3ec61Sdh155122 
806*f4b3ec61Sdh155122 	/* Handle the case when cr_zone is NULL */
807*f4b3ec61Sdh155122 	if (zoneid == (zoneid_t)-1)
808*f4b3ec61Sdh155122 		zoneid = GLOBAL_ZONEID;
809*f4b3ec61Sdh155122 
810*f4b3ec61Sdh155122 	/* For performance ... */
811*f4b3ec61Sdh155122 	if (curproc->p_zone->zone_id == zoneid)
812*f4b3ec61Sdh155122 		return (netstack_get_current());
813*f4b3ec61Sdh155122 	else
814*f4b3ec61Sdh155122 		return (netstack_find_by_zoneid(zoneid));
815*f4b3ec61Sdh155122 }
816*f4b3ec61Sdh155122 
817*f4b3ec61Sdh155122 /*
818*f4b3ec61Sdh155122  * Find a stack instance given the zoneid.
819*f4b3ec61Sdh155122  * Increases the reference count if found; caller must do a
820*f4b3ec61Sdh155122  * netstack_rele().
821*f4b3ec61Sdh155122  *
822*f4b3ec61Sdh155122  * If there is no exact match then assume the shared stack instance
823*f4b3ec61Sdh155122  * matches.
824*f4b3ec61Sdh155122  *
825*f4b3ec61Sdh155122  * Skip the unitialized ones.
826*f4b3ec61Sdh155122  */
827*f4b3ec61Sdh155122 netstack_t *
828*f4b3ec61Sdh155122 netstack_find_by_zoneid(zoneid_t zoneid)
829*f4b3ec61Sdh155122 {
830*f4b3ec61Sdh155122 	netstack_t *ns;
831*f4b3ec61Sdh155122 	zone_t *zone;
832*f4b3ec61Sdh155122 
833*f4b3ec61Sdh155122 	zone = zone_find_by_id(zoneid);
834*f4b3ec61Sdh155122 
835*f4b3ec61Sdh155122 	if (zone == NULL)
836*f4b3ec61Sdh155122 		return (NULL);
837*f4b3ec61Sdh155122 
838*f4b3ec61Sdh155122 	ns = zone->zone_netstack;
839*f4b3ec61Sdh155122 	ASSERT(ns != NULL);
840*f4b3ec61Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
841*f4b3ec61Sdh155122 		ns = NULL;
842*f4b3ec61Sdh155122 	else
843*f4b3ec61Sdh155122 		netstack_hold(ns);
844*f4b3ec61Sdh155122 
845*f4b3ec61Sdh155122 	zone_rele(zone);
846*f4b3ec61Sdh155122 	return (ns);
847*f4b3ec61Sdh155122 }
848*f4b3ec61Sdh155122 
849*f4b3ec61Sdh155122 /*
850*f4b3ec61Sdh155122  * Find a stack instance given the zoneid.
851*f4b3ec61Sdh155122  * Increases the reference count if found; caller must do a
852*f4b3ec61Sdh155122  * netstack_rele().
853*f4b3ec61Sdh155122  *
854*f4b3ec61Sdh155122  * If there is no exact match then assume the shared stack instance
855*f4b3ec61Sdh155122  * matches.
856*f4b3ec61Sdh155122  *
857*f4b3ec61Sdh155122  * Skip the unitialized ones.
858*f4b3ec61Sdh155122  *
859*f4b3ec61Sdh155122  * NOTE: The caller must hold zonehash_lock.
860*f4b3ec61Sdh155122  */
861*f4b3ec61Sdh155122 netstack_t *
862*f4b3ec61Sdh155122 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
863*f4b3ec61Sdh155122 {
864*f4b3ec61Sdh155122 	netstack_t *ns;
865*f4b3ec61Sdh155122 	zone_t *zone;
866*f4b3ec61Sdh155122 
867*f4b3ec61Sdh155122 	zone = zone_find_by_id_nolock(zoneid);
868*f4b3ec61Sdh155122 
869*f4b3ec61Sdh155122 	if (zone == NULL)
870*f4b3ec61Sdh155122 		return (NULL);
871*f4b3ec61Sdh155122 
872*f4b3ec61Sdh155122 	ns = zone->zone_netstack;
873*f4b3ec61Sdh155122 	ASSERT(ns != NULL);
874*f4b3ec61Sdh155122 
875*f4b3ec61Sdh155122 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
876*f4b3ec61Sdh155122 		ns = NULL;
877*f4b3ec61Sdh155122 	else
878*f4b3ec61Sdh155122 		netstack_hold(ns);
879*f4b3ec61Sdh155122 
880*f4b3ec61Sdh155122 	zone_rele(zone);
881*f4b3ec61Sdh155122 	return (ns);
882*f4b3ec61Sdh155122 }
883*f4b3ec61Sdh155122 
884*f4b3ec61Sdh155122 /*
885*f4b3ec61Sdh155122  * Find a stack instance given the stackid with exact match?
886*f4b3ec61Sdh155122  * Increases the reference count if found; caller must do a
887*f4b3ec61Sdh155122  * netstack_rele().
888*f4b3ec61Sdh155122  *
889*f4b3ec61Sdh155122  * Skip the unitialized ones.
890*f4b3ec61Sdh155122  */
891*f4b3ec61Sdh155122 netstack_t *
892*f4b3ec61Sdh155122 netstack_find_by_stackid(netstackid_t stackid)
893*f4b3ec61Sdh155122 {
894*f4b3ec61Sdh155122 	netstack_t *ns;
895*f4b3ec61Sdh155122 
896*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
897*f4b3ec61Sdh155122 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
898*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
899*f4b3ec61Sdh155122 		if (ns->netstack_stackid == stackid &&
900*f4b3ec61Sdh155122 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
901*f4b3ec61Sdh155122 			mutex_exit(&ns->netstack_lock);
902*f4b3ec61Sdh155122 			netstack_hold(ns);
903*f4b3ec61Sdh155122 			mutex_exit(&netstack_g_lock);
904*f4b3ec61Sdh155122 			return (ns);
905*f4b3ec61Sdh155122 		}
906*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
907*f4b3ec61Sdh155122 	}
908*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
909*f4b3ec61Sdh155122 	return (NULL);
910*f4b3ec61Sdh155122 }
911*f4b3ec61Sdh155122 
912*f4b3ec61Sdh155122 void
913*f4b3ec61Sdh155122 netstack_rele(netstack_t *ns)
914*f4b3ec61Sdh155122 {
915*f4b3ec61Sdh155122 	netstack_t **nsp;
916*f4b3ec61Sdh155122 	boolean_t found;
917*f4b3ec61Sdh155122 	int refcnt, numzones;
918*f4b3ec61Sdh155122 
919*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
920*f4b3ec61Sdh155122 	ASSERT(ns->netstack_refcnt > 0);
921*f4b3ec61Sdh155122 	ns->netstack_refcnt--;
922*f4b3ec61Sdh155122 	/*
923*f4b3ec61Sdh155122 	 * As we drop the lock additional netstack_rele()s can come in
924*f4b3ec61Sdh155122 	 * and decrement the refcnt to zero and free the netstack_t.
925*f4b3ec61Sdh155122 	 * Store pointers in local variables and if we were not the last
926*f4b3ec61Sdh155122 	 * then don't reference the netstack_t after that.
927*f4b3ec61Sdh155122 	 */
928*f4b3ec61Sdh155122 	refcnt = ns->netstack_refcnt;
929*f4b3ec61Sdh155122 	numzones = ns->netstack_numzones;
930*f4b3ec61Sdh155122 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
931*f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
932*f4b3ec61Sdh155122 
933*f4b3ec61Sdh155122 	if (refcnt == 0 && numzones == 0) {
934*f4b3ec61Sdh155122 		/*
935*f4b3ec61Sdh155122 		 * Time to call the destroy functions and free up
936*f4b3ec61Sdh155122 		 * the structure
937*f4b3ec61Sdh155122 		 */
938*f4b3ec61Sdh155122 		netstack_stack_inactive(ns);
939*f4b3ec61Sdh155122 
940*f4b3ec61Sdh155122 		/* Finally remove from list of netstacks */
941*f4b3ec61Sdh155122 		mutex_enter(&netstack_g_lock);
942*f4b3ec61Sdh155122 		found = B_FALSE;
943*f4b3ec61Sdh155122 		for (nsp = &netstack_head; *nsp != NULL;
944*f4b3ec61Sdh155122 		    nsp = &(*nsp)->netstack_next) {
945*f4b3ec61Sdh155122 			if (*nsp == ns) {
946*f4b3ec61Sdh155122 				*nsp = ns->netstack_next;
947*f4b3ec61Sdh155122 				ns->netstack_next = NULL;
948*f4b3ec61Sdh155122 				found = B_TRUE;
949*f4b3ec61Sdh155122 				break;
950*f4b3ec61Sdh155122 			}
951*f4b3ec61Sdh155122 		}
952*f4b3ec61Sdh155122 		ASSERT(found);
953*f4b3ec61Sdh155122 		mutex_exit(&netstack_g_lock);
954*f4b3ec61Sdh155122 
955*f4b3ec61Sdh155122 		ASSERT(ns->netstack_flags & NSF_CLOSING);
956*f4b3ec61Sdh155122 		kmem_free(ns, sizeof (*ns));
957*f4b3ec61Sdh155122 	}
958*f4b3ec61Sdh155122 }
959*f4b3ec61Sdh155122 
960*f4b3ec61Sdh155122 void
961*f4b3ec61Sdh155122 netstack_hold(netstack_t *ns)
962*f4b3ec61Sdh155122 {
963*f4b3ec61Sdh155122 	mutex_enter(&ns->netstack_lock);
964*f4b3ec61Sdh155122 	ns->netstack_refcnt++;
965*f4b3ec61Sdh155122 	ASSERT(ns->netstack_refcnt > 0);
966*f4b3ec61Sdh155122 	mutex_exit(&ns->netstack_lock);
967*f4b3ec61Sdh155122 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
968*f4b3ec61Sdh155122 }
969*f4b3ec61Sdh155122 
970*f4b3ec61Sdh155122 /*
971*f4b3ec61Sdh155122  * To support kstat_create_netstack() using kstat_zone_add we need
972*f4b3ec61Sdh155122  * to track both
973*f4b3ec61Sdh155122  *  - all zoneids that use the global/shared stack
974*f4b3ec61Sdh155122  *  - all kstats that have been added for the shared stack
975*f4b3ec61Sdh155122  */
976*f4b3ec61Sdh155122 kstat_t *
977*f4b3ec61Sdh155122 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
978*f4b3ec61Sdh155122     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
979*f4b3ec61Sdh155122     netstackid_t ks_netstackid)
980*f4b3ec61Sdh155122 {
981*f4b3ec61Sdh155122 	kstat_t *ks;
982*f4b3ec61Sdh155122 
983*f4b3ec61Sdh155122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
984*f4b3ec61Sdh155122 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
985*f4b3ec61Sdh155122 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
986*f4b3ec61Sdh155122 		if (ks != NULL)
987*f4b3ec61Sdh155122 			netstack_shared_kstat_add(ks);
988*f4b3ec61Sdh155122 		return (ks);
989*f4b3ec61Sdh155122 	} else {
990*f4b3ec61Sdh155122 		zoneid_t zoneid = ks_netstackid;
991*f4b3ec61Sdh155122 
992*f4b3ec61Sdh155122 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
993*f4b3ec61Sdh155122 			ks_class, ks_type, ks_ndata, ks_flags, zoneid));
994*f4b3ec61Sdh155122 	}
995*f4b3ec61Sdh155122 }
996*f4b3ec61Sdh155122 
997*f4b3ec61Sdh155122 void
998*f4b3ec61Sdh155122 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
999*f4b3ec61Sdh155122 {
1000*f4b3ec61Sdh155122 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1001*f4b3ec61Sdh155122 		netstack_shared_kstat_remove(ks);
1002*f4b3ec61Sdh155122 	}
1003*f4b3ec61Sdh155122 	kstat_delete(ks);
1004*f4b3ec61Sdh155122 }
1005*f4b3ec61Sdh155122 
1006*f4b3ec61Sdh155122 static void
1007*f4b3ec61Sdh155122 netstack_shared_zone_add(zoneid_t zoneid)
1008*f4b3ec61Sdh155122 {
1009*f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1010*f4b3ec61Sdh155122 	struct shared_kstat_list *sk;
1011*f4b3ec61Sdh155122 
1012*f4b3ec61Sdh155122 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1013*f4b3ec61Sdh155122 	sz->sz_zoneid = zoneid;
1014*f4b3ec61Sdh155122 
1015*f4b3ec61Sdh155122 	/* Insert in list */
1016*f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1017*f4b3ec61Sdh155122 	sz->sz_next = netstack_shared_zones;
1018*f4b3ec61Sdh155122 	netstack_shared_zones = sz;
1019*f4b3ec61Sdh155122 
1020*f4b3ec61Sdh155122 	/*
1021*f4b3ec61Sdh155122 	 * Perform kstat_zone_add for each existing shared stack kstat.
1022*f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1023*f4b3ec61Sdh155122 	 */
1024*f4b3ec61Sdh155122 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1025*f4b3ec61Sdh155122 		kstat_zone_add(sk->sk_kstat, zoneid);
1026*f4b3ec61Sdh155122 	}
1027*f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1028*f4b3ec61Sdh155122 }
1029*f4b3ec61Sdh155122 
1030*f4b3ec61Sdh155122 static void
1031*f4b3ec61Sdh155122 netstack_shared_zone_remove(zoneid_t zoneid)
1032*f4b3ec61Sdh155122 {
1033*f4b3ec61Sdh155122 	struct shared_zone_list **szp, *sz;
1034*f4b3ec61Sdh155122 	struct shared_kstat_list *sk;
1035*f4b3ec61Sdh155122 
1036*f4b3ec61Sdh155122 	/* Find in list */
1037*f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1038*f4b3ec61Sdh155122 	sz = NULL;
1039*f4b3ec61Sdh155122 	for (szp = &netstack_shared_zones; *szp != NULL;
1040*f4b3ec61Sdh155122 	    szp = &((*szp)->sz_next)) {
1041*f4b3ec61Sdh155122 		if ((*szp)->sz_zoneid == zoneid) {
1042*f4b3ec61Sdh155122 			sz = *szp;
1043*f4b3ec61Sdh155122 			break;
1044*f4b3ec61Sdh155122 		}
1045*f4b3ec61Sdh155122 	}
1046*f4b3ec61Sdh155122 	/* We must find it */
1047*f4b3ec61Sdh155122 	ASSERT(sz != NULL);
1048*f4b3ec61Sdh155122 	*szp = sz->sz_next;
1049*f4b3ec61Sdh155122 	sz->sz_next = NULL;
1050*f4b3ec61Sdh155122 
1051*f4b3ec61Sdh155122 	/*
1052*f4b3ec61Sdh155122 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1053*f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1054*f4b3ec61Sdh155122 	 */
1055*f4b3ec61Sdh155122 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1056*f4b3ec61Sdh155122 		kstat_zone_remove(sk->sk_kstat, zoneid);
1057*f4b3ec61Sdh155122 	}
1058*f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1059*f4b3ec61Sdh155122 
1060*f4b3ec61Sdh155122 	kmem_free(sz, sizeof (*sz));
1061*f4b3ec61Sdh155122 }
1062*f4b3ec61Sdh155122 
1063*f4b3ec61Sdh155122 static void
1064*f4b3ec61Sdh155122 netstack_shared_kstat_add(kstat_t *ks)
1065*f4b3ec61Sdh155122 {
1066*f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1067*f4b3ec61Sdh155122 	struct shared_kstat_list *sk;
1068*f4b3ec61Sdh155122 
1069*f4b3ec61Sdh155122 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1070*f4b3ec61Sdh155122 	sk->sk_kstat = ks;
1071*f4b3ec61Sdh155122 
1072*f4b3ec61Sdh155122 	/* Insert in list */
1073*f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1074*f4b3ec61Sdh155122 	sk->sk_next = netstack_shared_kstats;
1075*f4b3ec61Sdh155122 	netstack_shared_kstats = sk;
1076*f4b3ec61Sdh155122 
1077*f4b3ec61Sdh155122 	/*
1078*f4b3ec61Sdh155122 	 * Perform kstat_zone_add for each existing shared stack zone.
1079*f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1080*f4b3ec61Sdh155122 	 */
1081*f4b3ec61Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1082*f4b3ec61Sdh155122 		kstat_zone_add(ks, sz->sz_zoneid);
1083*f4b3ec61Sdh155122 	}
1084*f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1085*f4b3ec61Sdh155122 }
1086*f4b3ec61Sdh155122 
1087*f4b3ec61Sdh155122 static void
1088*f4b3ec61Sdh155122 netstack_shared_kstat_remove(kstat_t *ks)
1089*f4b3ec61Sdh155122 {
1090*f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1091*f4b3ec61Sdh155122 	struct shared_kstat_list **skp, *sk;
1092*f4b3ec61Sdh155122 
1093*f4b3ec61Sdh155122 	/* Find in list */
1094*f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1095*f4b3ec61Sdh155122 	sk = NULL;
1096*f4b3ec61Sdh155122 	for (skp = &netstack_shared_kstats; *skp != NULL;
1097*f4b3ec61Sdh155122 	    skp = &((*skp)->sk_next)) {
1098*f4b3ec61Sdh155122 		if ((*skp)->sk_kstat == ks) {
1099*f4b3ec61Sdh155122 			sk = *skp;
1100*f4b3ec61Sdh155122 			break;
1101*f4b3ec61Sdh155122 		}
1102*f4b3ec61Sdh155122 	}
1103*f4b3ec61Sdh155122 	/* Must find it */
1104*f4b3ec61Sdh155122 	ASSERT(sk != NULL);
1105*f4b3ec61Sdh155122 	*skp = sk->sk_next;
1106*f4b3ec61Sdh155122 	sk->sk_next = NULL;
1107*f4b3ec61Sdh155122 
1108*f4b3ec61Sdh155122 	/*
1109*f4b3ec61Sdh155122 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1110*f4b3ec61Sdh155122 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1111*f4b3ec61Sdh155122 	 */
1112*f4b3ec61Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1113*f4b3ec61Sdh155122 		kstat_zone_remove(ks, sz->sz_zoneid);
1114*f4b3ec61Sdh155122 	}
1115*f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1116*f4b3ec61Sdh155122 	kmem_free(sk, sizeof (*sk));
1117*f4b3ec61Sdh155122 }
1118*f4b3ec61Sdh155122 
1119*f4b3ec61Sdh155122 /*
1120*f4b3ec61Sdh155122  * If a zoneid is part of the shared zone, return true
1121*f4b3ec61Sdh155122  */
1122*f4b3ec61Sdh155122 static boolean_t
1123*f4b3ec61Sdh155122 netstack_find_shared_zoneid(zoneid_t zoneid)
1124*f4b3ec61Sdh155122 {
1125*f4b3ec61Sdh155122 	struct shared_zone_list *sz;
1126*f4b3ec61Sdh155122 
1127*f4b3ec61Sdh155122 	mutex_enter(&netstack_shared_lock);
1128*f4b3ec61Sdh155122 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1129*f4b3ec61Sdh155122 		if (sz->sz_zoneid == zoneid) {
1130*f4b3ec61Sdh155122 			mutex_exit(&netstack_shared_lock);
1131*f4b3ec61Sdh155122 			return (B_TRUE);
1132*f4b3ec61Sdh155122 		}
1133*f4b3ec61Sdh155122 	}
1134*f4b3ec61Sdh155122 	mutex_exit(&netstack_shared_lock);
1135*f4b3ec61Sdh155122 	return (B_FALSE);
1136*f4b3ec61Sdh155122 }
1137*f4b3ec61Sdh155122 
1138*f4b3ec61Sdh155122 /*
1139*f4b3ec61Sdh155122  * Hide the fact that zoneids and netstackids are allocated from
1140*f4b3ec61Sdh155122  * the same space in the current implementation.
1141*f4b3ec61Sdh155122  * XXX could add checks that the stackid/zoneids are valid...
1142*f4b3ec61Sdh155122  */
1143*f4b3ec61Sdh155122 zoneid_t
1144*f4b3ec61Sdh155122 netstackid_to_zoneid(netstackid_t stackid)
1145*f4b3ec61Sdh155122 {
1146*f4b3ec61Sdh155122 	return (stackid);
1147*f4b3ec61Sdh155122 }
1148*f4b3ec61Sdh155122 
1149*f4b3ec61Sdh155122 netstackid_t
1150*f4b3ec61Sdh155122 zoneid_to_netstackid(zoneid_t zoneid)
1151*f4b3ec61Sdh155122 {
1152*f4b3ec61Sdh155122 	if (netstack_find_shared_zoneid(zoneid))
1153*f4b3ec61Sdh155122 		return (GLOBAL_ZONEID);
1154*f4b3ec61Sdh155122 	else
1155*f4b3ec61Sdh155122 		return (zoneid);
1156*f4b3ec61Sdh155122 }
1157*f4b3ec61Sdh155122 
1158*f4b3ec61Sdh155122 /*
1159*f4b3ec61Sdh155122  * Simplistic support for walking all the handles.
1160*f4b3ec61Sdh155122  * Example usage:
1161*f4b3ec61Sdh155122  *	netstack_handle_t nh;
1162*f4b3ec61Sdh155122  *	netstack_t *ns;
1163*f4b3ec61Sdh155122  *
1164*f4b3ec61Sdh155122  *	netstack_next_init(&nh);
1165*f4b3ec61Sdh155122  *	while ((ns = netstack_next(&nh)) != NULL) {
1166*f4b3ec61Sdh155122  *		do something;
1167*f4b3ec61Sdh155122  *		netstack_rele(ns);
1168*f4b3ec61Sdh155122  *	}
1169*f4b3ec61Sdh155122  *	netstack_next_fini(&nh);
1170*f4b3ec61Sdh155122  */
1171*f4b3ec61Sdh155122 void
1172*f4b3ec61Sdh155122 netstack_next_init(netstack_handle_t *handle)
1173*f4b3ec61Sdh155122 {
1174*f4b3ec61Sdh155122 	*handle = 0;
1175*f4b3ec61Sdh155122 }
1176*f4b3ec61Sdh155122 
1177*f4b3ec61Sdh155122 /* ARGSUSED */
1178*f4b3ec61Sdh155122 void
1179*f4b3ec61Sdh155122 netstack_next_fini(netstack_handle_t *handle)
1180*f4b3ec61Sdh155122 {
1181*f4b3ec61Sdh155122 }
1182*f4b3ec61Sdh155122 
1183*f4b3ec61Sdh155122 netstack_t *
1184*f4b3ec61Sdh155122 netstack_next(netstack_handle_t *handle)
1185*f4b3ec61Sdh155122 {
1186*f4b3ec61Sdh155122 	netstack_t *ns;
1187*f4b3ec61Sdh155122 	int i, end;
1188*f4b3ec61Sdh155122 
1189*f4b3ec61Sdh155122 	end = *handle;
1190*f4b3ec61Sdh155122 	/* Walk skipping *handle number of instances */
1191*f4b3ec61Sdh155122 
1192*f4b3ec61Sdh155122 	/* Look if there is a matching stack instance */
1193*f4b3ec61Sdh155122 	mutex_enter(&netstack_g_lock);
1194*f4b3ec61Sdh155122 	ns = netstack_head;
1195*f4b3ec61Sdh155122 	for (i = 0; i < end; i++) {
1196*f4b3ec61Sdh155122 		if (ns == NULL)
1197*f4b3ec61Sdh155122 			break;
1198*f4b3ec61Sdh155122 		ns = ns->netstack_next;
1199*f4b3ec61Sdh155122 	}
1200*f4b3ec61Sdh155122 	/* skip those with that aren't really here */
1201*f4b3ec61Sdh155122 	while (ns != NULL) {
1202*f4b3ec61Sdh155122 		mutex_enter(&ns->netstack_lock);
1203*f4b3ec61Sdh155122 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1204*f4b3ec61Sdh155122 			mutex_exit(&ns->netstack_lock);
1205*f4b3ec61Sdh155122 			break;
1206*f4b3ec61Sdh155122 		}
1207*f4b3ec61Sdh155122 		mutex_exit(&ns->netstack_lock);
1208*f4b3ec61Sdh155122 		end++;
1209*f4b3ec61Sdh155122 		ns = ns->netstack_next;
1210*f4b3ec61Sdh155122 	}
1211*f4b3ec61Sdh155122 	if (ns != NULL) {
1212*f4b3ec61Sdh155122 		*handle = end + 1;
1213*f4b3ec61Sdh155122 		netstack_hold(ns);
1214*f4b3ec61Sdh155122 	}
1215*f4b3ec61Sdh155122 	mutex_exit(&netstack_g_lock);
1216*f4b3ec61Sdh155122 	return (ns);
1217*f4b3ec61Sdh155122 }
1218