xref: /illumos-gate/usr/src/uts/common/os/netstack.c (revision 24472db64c485d6744c0321b7581cf066556cf2d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/sysmacros.h>
31 #include <sys/vm.h>
32 #include <sys/proc.h>
33 #include <sys/tuneable.h>
34 #include <sys/systm.h>
35 #include <sys/cmn_err.h>
36 #include <sys/debug.h>
37 #include <sys/sdt.h>
38 #include <sys/mutex.h>
39 #include <sys/bitmap.h>
40 #include <sys/atomic.h>
41 #include <sys/kobj.h>
42 #include <sys/disp.h>
43 #include <vm/seg_kmem.h>
44 #include <sys/zone.h>
45 #include <sys/netstack.h>
46 
47 /*
48  * What we use so that the zones framework can tell us about new zones,
49  * which we use to create new stacks.
50  */
51 static zone_key_t netstack_zone_key;
52 
53 static int	netstack_initialized = 0;
54 
55 /*
56  * Track the registered netstacks.
57  * The global lock protects
58  * - ns_reg
59  * - the list starting at netstack_head and following the netstack_next
60  *   pointers.
61  */
62 static kmutex_t netstack_g_lock;
63 
64 /*
65  * Registry of netstacks with their create/shutdown/destory functions.
66  */
67 static struct netstack_registry	ns_reg[NS_MAX];
68 
69 /*
70  * Global list of existing stacks.  We use this when a new zone with
71  * an exclusive IP instance is created.
72  *
73  * Note that in some cases a netstack_t needs to stay around after the zone
74  * has gone away. This is because there might be outstanding references
75  * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76  * structure and all the foo_stack_t's hanging off of it will be cleaned up
77  * when the last reference to it is dropped.
78  * However, the same zone might be rebooted. That is handled using the
79  * assumption that the zones framework picks a new zoneid each time a zone
80  * is (re)booted. We assert for that condition in netstack_zone_create().
81  * Thus the old netstack_t can take its time for things to time out.
82  */
83 static netstack_t *netstack_head;
84 
85 /*
86  * To support kstat_create_netstack() using kstat_zone_add we need
87  * to track both
88  *  - all zoneids that use the global/shared stack
89  *  - all kstats that have been added for the shared stack
90  */
91 struct shared_zone_list {
92 	struct shared_zone_list *sz_next;
93 	zoneid_t		sz_zoneid;
94 };
95 
96 struct shared_kstat_list {
97 	struct shared_kstat_list *sk_next;
98 	kstat_t			 *sk_kstat;
99 };
100 
101 static kmutex_t netstack_shared_lock;	/* protects the following two */
102 static struct shared_zone_list	*netstack_shared_zones;
103 static struct shared_kstat_list	*netstack_shared_kstats;
104 
105 static void	*netstack_zone_create(zoneid_t zoneid);
106 static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107 static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
108 
109 static void	netstack_do_create(netstack_t *ns, int moduleid);
110 static void	netstack_do_shutdown(netstack_t *ns, int moduleid);
111 static void	netstack_do_destroy(netstack_t *ns, int moduleid);
112 
113 static void	netstack_shared_zone_add(zoneid_t zoneid);
114 static void	netstack_shared_zone_remove(zoneid_t zoneid);
115 static void	netstack_shared_kstat_add(kstat_t *ks);
116 static void	netstack_shared_kstat_remove(kstat_t *ks);
117 
118 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
119 
120 void
121 netstack_init(void)
122 {
123 	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
124 	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
125 
126 	netstack_initialized = 1;
127 
128 	/*
129 	 * We want to be informed each time a zone is created or
130 	 * destroyed in the kernel, so we can maintain the
131 	 * stack instance information.
132 	 */
133 	zone_key_create(&netstack_zone_key, netstack_zone_create,
134 	    netstack_zone_shutdown, netstack_zone_destroy);
135 }
136 
137 /*
138  * Register a new module with the framework.
139  * This registers interest in changes to the set of netstacks.
140  * The createfn and destroyfn are required, but the shutdownfn can be
141  * NULL.
142  * Note that due to the current zsd implementation, when the create
143  * function is called the zone isn't fully present, thus functions
144  * like zone_find_by_* will fail, hence the create function can not
145  * use many zones kernel functions including zcmn_err().
146  */
147 void
148 netstack_register(int moduleid,
149     void *(*module_create)(netstackid_t, netstack_t *),
150     void (*module_shutdown)(netstackid_t, void *),
151     void (*module_destroy)(netstackid_t, void *))
152 {
153 	netstack_t *ns;
154 
155 	ASSERT(netstack_initialized);
156 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
157 	ASSERT(module_create != NULL);
158 
159 	mutex_enter(&netstack_g_lock);
160 	ASSERT(ns_reg[moduleid].nr_create == NULL);
161 	ASSERT(ns_reg[moduleid].nr_flags == 0);
162 	ns_reg[moduleid].nr_create = module_create;
163 	ns_reg[moduleid].nr_shutdown = module_shutdown;
164 	ns_reg[moduleid].nr_destroy = module_destroy;
165 	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
166 
167 	/*
168 	 * Determine the set of stacks that exist before we drop the lock.
169 	 * Set CREATE_NEEDED for each of those.
170 	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
171 	 * set, but check NSF_CLOSING to be sure.
172 	 */
173 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
174 		mutex_enter(&ns->netstack_lock);
175 		if (!(ns->netstack_flags & NSF_CLOSING) &&
176 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_ALL) == 0) {
177 			ns->netstack_m_state[moduleid] |= NSS_CREATE_NEEDED;
178 			DTRACE_PROBE2(netstack__create__needed,
179 			    netstack_t *, ns, int, moduleid);
180 		}
181 		mutex_exit(&ns->netstack_lock);
182 	}
183 	mutex_exit(&netstack_g_lock);
184 
185 	/*
186 	 * Call the create function for each stack that has CREATE_NEEDED
187 	 * for this moduleid.
188 	 * Set CREATE_INPROGRESS, drop lock, and after done,
189 	 * set CREATE_COMPLETE
190 	 */
191 	netstack_do_create(NULL, moduleid);
192 }
193 
194 void
195 netstack_unregister(int moduleid)
196 {
197 	netstack_t *ns;
198 
199 	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
200 
201 	ASSERT(ns_reg[moduleid].nr_create != NULL);
202 	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
203 
204 	mutex_enter(&netstack_g_lock);
205 	/*
206 	 * Determine the set of stacks that exist before we drop the lock.
207 	 * Set SHUTDOWN_NEEDED and DESTROY_NEEDED for each of those.
208 	 */
209 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
210 		mutex_enter(&ns->netstack_lock);
211 		if (ns_reg[moduleid].nr_shutdown != NULL &&
212 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
213 		    (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_ALL) == 0) {
214 			ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_NEEDED;
215 			DTRACE_PROBE2(netstack__shutdown__needed,
216 			    netstack_t *, ns, int, moduleid);
217 		}
218 		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
219 		    ns_reg[moduleid].nr_destroy != NULL &&
220 		    (ns->netstack_m_state[moduleid] & NSS_CREATE_COMPLETED) &&
221 		    (ns->netstack_m_state[moduleid] & NSS_DESTROY_ALL) == 0) {
222 			ns->netstack_m_state[moduleid] |= NSS_DESTROY_NEEDED;
223 			DTRACE_PROBE2(netstack__destroy__needed,
224 			    netstack_t *, ns, int, moduleid);
225 		}
226 		mutex_exit(&ns->netstack_lock);
227 	}
228 	mutex_exit(&netstack_g_lock);
229 
230 	netstack_do_shutdown(NULL, moduleid);
231 	netstack_do_destroy(NULL, moduleid);
232 
233 	/*
234 	 * Clear the netstack_m_state so that we can handle this module
235 	 * being loaded again.
236 	 */
237 	mutex_enter(&netstack_g_lock);
238 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
239 		mutex_enter(&ns->netstack_lock);
240 		if (ns->netstack_m_state[moduleid] & NSS_DESTROY_COMPLETED) {
241 			ns->netstack_m_state[moduleid] = 0;
242 			DTRACE_PROBE2(netstack__destroy__done,
243 			    netstack_t *, ns, int, moduleid);
244 		}
245 		mutex_exit(&ns->netstack_lock);
246 	}
247 
248 	ns_reg[moduleid].nr_create = NULL;
249 	ns_reg[moduleid].nr_shutdown = NULL;
250 	ns_reg[moduleid].nr_destroy = NULL;
251 	ns_reg[moduleid].nr_flags = 0;
252 	mutex_exit(&netstack_g_lock);
253 }
254 
255 /*
256  * Lookup and/or allocate a netstack for this zone.
257  */
258 static void *
259 netstack_zone_create(zoneid_t zoneid)
260 {
261 	netstackid_t stackid;
262 	netstack_t *ns;
263 	netstack_t **nsp;
264 	zone_t	*zone;
265 	int i;
266 
267 	ASSERT(netstack_initialized);
268 
269 	zone = zone_find_by_id_nolock(zoneid);
270 	ASSERT(zone != NULL);
271 
272 	if (zone->zone_flags & ZF_NET_EXCL) {
273 		stackid = zoneid;
274 	} else {
275 		/* Look for the stack instance for the global */
276 		stackid = GLOBAL_NETSTACKID;
277 	}
278 
279 	/* Allocate even if it isn't needed; simplifies locking */
280 	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
281 
282 	/* Look if there is a matching stack instance */
283 	mutex_enter(&netstack_g_lock);
284 	for (nsp = &netstack_head; *nsp != NULL;
285 	    nsp = &((*nsp)->netstack_next)) {
286 		if ((*nsp)->netstack_stackid == stackid) {
287 			/*
288 			 * Should never find a pre-existing exclusive stack
289 			 */
290 			ASSERT(stackid == GLOBAL_NETSTACKID);
291 			kmem_free(ns, sizeof (netstack_t));
292 			ns = *nsp;
293 			mutex_enter(&ns->netstack_lock);
294 			ns->netstack_numzones++;
295 			mutex_exit(&ns->netstack_lock);
296 			mutex_exit(&netstack_g_lock);
297 			DTRACE_PROBE1(netstack__inc__numzones,
298 			    netstack_t *, ns);
299 			/* Record that we have a new shared stack zone */
300 			netstack_shared_zone_add(zoneid);
301 			zone->zone_netstack = ns;
302 			return (ns);
303 		}
304 	}
305 	/* Not found */
306 	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
307 	ns->netstack_stackid = zoneid;
308 	ns->netstack_numzones = 1;
309 	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
310 	ns->netstack_flags = NSF_UNINIT;
311 	*nsp = ns;
312 	zone->zone_netstack = ns;
313 
314 	/*
315 	 * Determine the set of module create functions that need to be
316 	 * called before we drop the lock.
317 	 */
318 	for (i = 0; i < NS_MAX; i++) {
319 		mutex_enter(&ns->netstack_lock);
320 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
321 		    (ns->netstack_m_state[i] & NSS_CREATE_ALL) == 0) {
322 			ns->netstack_m_state[i] |= NSS_CREATE_NEEDED;
323 			DTRACE_PROBE2(netstack__create__needed,
324 			    netstack_t *, ns, int, i);
325 		}
326 		mutex_exit(&ns->netstack_lock);
327 	}
328 	mutex_exit(&netstack_g_lock);
329 
330 	netstack_do_create(ns, NS_ALL);
331 
332 	mutex_enter(&ns->netstack_lock);
333 	ns->netstack_flags &= ~NSF_UNINIT;
334 	mutex_exit(&ns->netstack_lock);
335 
336 	return (ns);
337 }
338 
339 /* ARGSUSED */
340 static void
341 netstack_zone_shutdown(zoneid_t zoneid, void *arg)
342 {
343 	netstack_t *ns = (netstack_t *)arg;
344 	int i;
345 
346 	ASSERT(arg != NULL);
347 
348 	mutex_enter(&ns->netstack_lock);
349 	ASSERT(ns->netstack_numzones > 0);
350 	if (ns->netstack_numzones != 1) {
351 		/* Stack instance being used by other zone */
352 		mutex_exit(&ns->netstack_lock);
353 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
354 		return;
355 	}
356 	mutex_exit(&ns->netstack_lock);
357 
358 	mutex_enter(&netstack_g_lock);
359 	/*
360 	 * Determine the set of stacks that exist before we drop the lock.
361 	 * Set SHUTDOWN_NEEDED for each of those.
362 	 */
363 	for (i = 0; i < NS_MAX; i++) {
364 		mutex_enter(&ns->netstack_lock);
365 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
366 		    ns_reg[i].nr_shutdown != NULL &&
367 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
368 		    (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
369 			ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
370 			DTRACE_PROBE2(netstack__shutdown__needed,
371 			    netstack_t *, ns, int, i);
372 		}
373 		mutex_exit(&ns->netstack_lock);
374 	}
375 	mutex_exit(&netstack_g_lock);
376 
377 	/*
378 	 * Call the shutdown function for all registered modules for this
379 	 * netstack.
380 	 */
381 	netstack_do_shutdown(ns, NS_ALL);
382 }
383 
384 /*
385  * Common routine to release a zone.
386  * If this was the last zone using the stack instance then prepare to
387  * have the refcnt dropping to zero free the zone.
388  */
389 /* ARGSUSED */
390 static void
391 netstack_zone_destroy(zoneid_t zoneid, void *arg)
392 {
393 	netstack_t *ns = (netstack_t *)arg;
394 
395 	ASSERT(arg != NULL);
396 
397 	mutex_enter(&ns->netstack_lock);
398 	ASSERT(ns->netstack_numzones > 0);
399 	ns->netstack_numzones--;
400 	if (ns->netstack_numzones != 0) {
401 		/* Stack instance being used by other zone */
402 		mutex_exit(&ns->netstack_lock);
403 		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
404 		/* Record that we a shared stack zone has gone away */
405 		netstack_shared_zone_remove(zoneid);
406 		return;
407 	}
408 	/*
409 	 * Set CLOSING so that netstack_find_by will not find it.
410 	 */
411 	ns->netstack_flags |= NSF_CLOSING;
412 	mutex_exit(&ns->netstack_lock);
413 	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
414 	/* No other thread can call zone_destroy for this stack */
415 
416 	/*
417 	 * Decrease refcnt to account for the one in netstack_zone_init()
418 	 */
419 	netstack_rele(ns);
420 }
421 
422 /*
423  * Called when the reference count drops to zero.
424  * Call the destroy functions for each registered module.
425  */
426 static void
427 netstack_stack_inactive(netstack_t *ns)
428 {
429 	int i;
430 
431 	mutex_enter(&netstack_g_lock);
432 	/*
433 	 * If the shutdown callback wasn't called earlier (e.g., if this is
434 	 * a netstack shared between multiple zones), then we call it now.
435 	 */
436 	for (i = 0; i < NS_MAX; i++) {
437 		mutex_enter(&ns->netstack_lock);
438 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
439 		    ns_reg[i].nr_shutdown != NULL &&
440 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
441 		    (ns->netstack_m_state[i] & NSS_SHUTDOWN_ALL) == 0) {
442 			ns->netstack_m_state[i] |= NSS_SHUTDOWN_NEEDED;
443 			DTRACE_PROBE2(netstack__shutdown__needed,
444 			    netstack_t *, ns, int, i);
445 		}
446 		mutex_exit(&ns->netstack_lock);
447 	}
448 	/*
449 	 * Determine the set of stacks that exist before we drop the lock.
450 	 * Set DESTROY_NEEDED for each of those.
451 	 */
452 	for (i = 0; i < NS_MAX; i++) {
453 		mutex_enter(&ns->netstack_lock);
454 		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
455 		    ns_reg[i].nr_destroy != NULL &&
456 		    (ns->netstack_m_state[i] & NSS_CREATE_COMPLETED) &&
457 		    (ns->netstack_m_state[i] & NSS_DESTROY_ALL) == 0) {
458 			ns->netstack_m_state[i] |= NSS_DESTROY_NEEDED;
459 			DTRACE_PROBE2(netstack__destroy__needed,
460 			    netstack_t *, ns, int, i);
461 		}
462 		mutex_exit(&ns->netstack_lock);
463 	}
464 	mutex_exit(&netstack_g_lock);
465 
466 	/*
467 	 * Call the shutdown and destroy functions for all registered modules
468 	 * for this netstack.
469 	 */
470 	netstack_do_shutdown(ns, NS_ALL);
471 	netstack_do_destroy(ns, NS_ALL);
472 }
473 
474 /*
475  * Call the create function for the ns and moduleid if CREATE_NEEDED
476  * is set.
477  * When it calls it, it drops the netstack_lock held by the caller,
478  * and returns true to tell the caller it needs to re-evalute the
479  * state..
480  */
481 static boolean_t
482 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
483 {
484 	void *result;
485 	netstackid_t stackid;
486 
487 	ASSERT(MUTEX_HELD(lockp));
488 	mutex_enter(&ns->netstack_lock);
489 	if (ns->netstack_m_state[moduleid] & NSS_CREATE_NEEDED) {
490 		ns->netstack_m_state[moduleid] &= ~NSS_CREATE_NEEDED;
491 		ns->netstack_m_state[moduleid] |= NSS_CREATE_INPROGRESS;
492 		DTRACE_PROBE2(netstack__create__inprogress,
493 		    netstack_t *, ns, int, moduleid);
494 		mutex_exit(&ns->netstack_lock);
495 		mutex_exit(lockp);
496 
497 		ASSERT(ns_reg[moduleid].nr_create != NULL);
498 		stackid = ns->netstack_stackid;
499 		DTRACE_PROBE2(netstack__create__start,
500 		    netstackid_t, stackid,
501 		    netstack_t *, ns);
502 		result = (ns_reg[moduleid].nr_create)(stackid, ns);
503 		DTRACE_PROBE2(netstack__create__end,
504 		    void *, result, netstack_t *, ns);
505 
506 		ASSERT(result != NULL);
507 		mutex_enter(&ns->netstack_lock);
508 		ns->netstack_modules[moduleid] = result;
509 		ns->netstack_m_state[moduleid] &= ~NSS_CREATE_INPROGRESS;
510 		ns->netstack_m_state[moduleid] |= NSS_CREATE_COMPLETED;
511 		DTRACE_PROBE2(netstack__create__completed,
512 		    netstack_t *, ns, int, moduleid);
513 		mutex_exit(&ns->netstack_lock);
514 		return (B_TRUE);
515 	} else {
516 		mutex_exit(&ns->netstack_lock);
517 		return (B_FALSE);
518 	}
519 }
520 
521 /*
522  * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
523  * is set.
524  * When it calls it, it drops the netstack_lock held by the caller,
525  * and returns true to tell the caller it needs to re-evalute the
526  * state..
527  */
528 static boolean_t
529 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
530 {
531 	netstackid_t stackid;
532 	void * netstack_module;
533 
534 	ASSERT(MUTEX_HELD(lockp));
535 	mutex_enter(&ns->netstack_lock);
536 	if (ns->netstack_m_state[moduleid] & NSS_SHUTDOWN_NEEDED) {
537 		ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_NEEDED;
538 		ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_INPROGRESS;
539 		DTRACE_PROBE2(netstack__shutdown__inprogress,
540 		    netstack_t *, ns, int, moduleid);
541 		mutex_exit(&ns->netstack_lock);
542 		mutex_exit(lockp);
543 
544 		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
545 		stackid = ns->netstack_stackid;
546 		netstack_module = ns->netstack_modules[moduleid];
547 		DTRACE_PROBE2(netstack__shutdown__start,
548 		    netstackid_t, stackid,
549 		    void *, netstack_module);
550 		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
551 		DTRACE_PROBE1(netstack__shutdown__end,
552 		    netstack_t *, ns);
553 
554 		mutex_enter(&ns->netstack_lock);
555 		ns->netstack_m_state[moduleid] &= ~NSS_SHUTDOWN_INPROGRESS;
556 		ns->netstack_m_state[moduleid] |= NSS_SHUTDOWN_COMPLETED;
557 		DTRACE_PROBE2(netstack__shutdown__completed,
558 		    netstack_t *, ns, int, moduleid);
559 		mutex_exit(&ns->netstack_lock);
560 		return (B_TRUE);
561 	} else {
562 		mutex_exit(&ns->netstack_lock);
563 		return (B_FALSE);
564 	}
565 }
566 
567 /*
568  * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
569  * is set.
570  * When it calls it, it drops the netstack_lock held by the caller,
571  * and returns true to tell the caller it needs to re-evalute the
572  * state..
573  */
574 static boolean_t
575 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
576 {
577 	netstackid_t stackid;
578 	void * netstack_module;
579 
580 	ASSERT(MUTEX_HELD(lockp));
581 	mutex_enter(&ns->netstack_lock);
582 	if (ns->netstack_m_state[moduleid] & NSS_DESTROY_NEEDED) {
583 		ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_NEEDED;
584 		ns->netstack_m_state[moduleid] |= NSS_DESTROY_INPROGRESS;
585 		DTRACE_PROBE2(netstack__destroy__inprogress,
586 		    netstack_t *, ns, int, moduleid);
587 		mutex_exit(&ns->netstack_lock);
588 		mutex_exit(lockp);
589 
590 		/* XXX race against unregister? */
591 		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
592 		stackid = ns->netstack_stackid;
593 		netstack_module = ns->netstack_modules[moduleid];
594 		DTRACE_PROBE2(netstack__destroy__start,
595 		    netstackid_t, stackid,
596 		    void *, netstack_module);
597 		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
598 		DTRACE_PROBE1(netstack__destroy__end,
599 		    netstack_t *, ns);
600 
601 		mutex_enter(&ns->netstack_lock);
602 		ns->netstack_modules[moduleid] = NULL;
603 		ns->netstack_m_state[moduleid] &= ~NSS_DESTROY_INPROGRESS;
604 		ns->netstack_m_state[moduleid] |= NSS_DESTROY_COMPLETED;
605 		DTRACE_PROBE2(netstack__destroy__completed,
606 		    netstack_t *, ns, int, moduleid);
607 		mutex_exit(&ns->netstack_lock);
608 		return (B_TRUE);
609 	} else {
610 		mutex_exit(&ns->netstack_lock);
611 		return (B_FALSE);
612 	}
613 }
614 
615 /*
616  * Apply a function to all netstacks for a particular moduleid.
617  *
618  * The applyfn has to drop netstack_g_lock if it does some work.
619  * In that case we don't follow netstack_next after reacquiring the
620  * lock, even if it is possible to do so without any hazards. This is
621  * because we want the design to allow for the list of netstacks threaded
622  * by netstack_next to change in any arbitrary way during the time the
623  * lock was dropped.
624  *
625  * It is safe to restart the loop at netstack_head since the applyfn
626  * changes netstack_m_state as it processes things, so a subsequent
627  * pass through will have no effect in applyfn, hence the loop will terminate
628  * in at worst O(N^2).
629  */
630 static void
631 apply_all_netstacks(int moduleid, applyfn_t *applyfn)
632 {
633 	netstack_t *ns;
634 
635 	mutex_enter(&netstack_g_lock);
636 	ns = netstack_head;
637 	while (ns != NULL) {
638 		if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
639 			/* Lock dropped - restart at head */
640 #ifdef NS_DEBUG
641 			(void) printf("apply_all_netstacks: "
642 			    "LD for %p/%d, %d\n",
643 			    (void *)ns, ns->netstack_stackid, moduleid);
644 #endif
645 			mutex_enter(&netstack_g_lock);
646 			ns = netstack_head;
647 		} else {
648 			ns = ns->netstack_next;
649 		}
650 	}
651 	mutex_exit(&netstack_g_lock);
652 }
653 
654 /*
655  * Apply a function to all moduleids for a particular netstack.
656  *
657  * Since the netstack linkage doesn't matter in this case we can
658  * ignore whether the function drops the lock.
659  */
660 static void
661 apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
662 {
663 	int i;
664 
665 	mutex_enter(&netstack_g_lock);
666 	for (i = 0; i < NS_MAX; i++) {
667 		if ((applyfn)(&netstack_g_lock, ns, i)) {
668 			/*
669 			 * Lock dropped but since we are not iterating over
670 			 * netstack_head we can just reacquire the lock.
671 			 */
672 			mutex_enter(&netstack_g_lock);
673 		}
674 	}
675 	mutex_exit(&netstack_g_lock);
676 }
677 
678 /* Like the above but in reverse moduleid order */
679 static void
680 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
681 {
682 	int i;
683 
684 	mutex_enter(&netstack_g_lock);
685 	for (i = NS_MAX-1; i >= 0; i--) {
686 		if ((applyfn)(&netstack_g_lock, ns, i)) {
687 			/*
688 			 * Lock dropped but since we are not iterating over
689 			 * netstack_head we can just reacquire the lock.
690 			 */
691 			mutex_enter(&netstack_g_lock);
692 		}
693 	}
694 	mutex_exit(&netstack_g_lock);
695 }
696 
697 /*
698  * Apply a function to a subset of all module/netstack combinations.
699  *
700  * If ns is non-NULL we restrict it to that particular instance.
701  * If moduleid is a particular one (not NS_ALL), then we restrict it
702  * to that particular moduleid.
703  * When walking the moduleid, the reverse argument specifies that they
704  * should be walked in reverse order.
705  * The applyfn returns true if it had dropped the locks.
706  */
707 static void
708 netstack_do_apply(netstack_t *ns, int moduleid, boolean_t reverse,
709     applyfn_t *applyfn)
710 {
711 	if (ns != NULL) {
712 		ASSERT(moduleid == NS_ALL);
713 		if (reverse)
714 			apply_all_modules_reverse(ns, applyfn);
715 		else
716 			apply_all_modules(ns, applyfn);
717 	} else {
718 		ASSERT(moduleid != NS_ALL);
719 
720 		apply_all_netstacks(moduleid, applyfn);
721 	}
722 }
723 
724 /*
725  * Run the create function for all modules x stack combinations
726  * that have NSS_CREATE_NEEDED set.
727  *
728  * Call the create function for each stack that has CREATE_NEEDED.
729  * Set CREATE_INPROGRESS, drop lock, and after done,
730  * set CREATE_COMPLETE
731  */
732 static void
733 netstack_do_create(netstack_t *ns, int moduleid)
734 {
735 	netstack_do_apply(ns, moduleid, B_FALSE, netstack_apply_create);
736 }
737 
738 /*
739  * Run the shutdown function for all modules x stack combinations
740  * that have NSS_SHUTDOWN_NEEDED set.
741  *
742  * Call the shutdown function for each stack that has SHUTDOWN_NEEDED.
743  * Set SHUTDOWN_INPROGRESS, drop lock, and after done,
744  * set SHUTDOWN_COMPLETE
745  */
746 static void
747 netstack_do_shutdown(netstack_t *ns, int moduleid)
748 {
749 	netstack_do_apply(ns, moduleid, B_FALSE, netstack_apply_shutdown);
750 }
751 
752 /*
753  * Run the destroy function for all modules x stack combinations
754  * that have NSS_DESTROY_NEEDED set.
755  *
756  * Call the destroy function for each stack that has DESTROY_NEEDED.
757  * Set DESTROY_INPROGRESS, drop lock, and after done,
758  * set DESTROY_COMPLETE
759  *
760  * Since a netstack_t is never reused (when a zone is rebooted it gets
761  * a new zoneid == netstackid i.e. a new netstack_t is allocated) we leave
762  * netstack_m_state the way it is i.e. with NSS_DESTROY_COMPLETED set.
763  */
764 static void
765 netstack_do_destroy(netstack_t *ns, int moduleid)
766 {
767 	/*
768 	 * Have to walk the moduleids in reverse order since some
769 	 * modules make implicit assumptions about the order
770 	 */
771 	netstack_do_apply(ns, moduleid, B_TRUE, netstack_apply_destroy);
772 }
773 
774 /*
775  * Get the stack instance used in caller's zone.
776  * Increases the reference count, caller must do a netstack_rele.
777  * It can't be called after zone_destroy() has started.
778  */
779 netstack_t *
780 netstack_get_current(void)
781 {
782 	netstack_t *ns;
783 
784 	ns = curproc->p_zone->zone_netstack;
785 	ASSERT(ns != NULL);
786 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
787 		return (NULL);
788 
789 	netstack_hold(ns);
790 
791 	return (ns);
792 }
793 
794 /*
795  * Find a stack instance given the cred.
796  * This is used by the modules to potentially allow for a future when
797  * something other than the zoneid is used to determine the stack.
798  */
799 netstack_t *
800 netstack_find_by_cred(const cred_t *cr)
801 {
802 	zoneid_t zoneid = crgetzoneid(cr);
803 
804 	/* Handle the case when cr_zone is NULL */
805 	if (zoneid == (zoneid_t)-1)
806 		zoneid = GLOBAL_ZONEID;
807 
808 	/* For performance ... */
809 	if (curproc->p_zone->zone_id == zoneid)
810 		return (netstack_get_current());
811 	else
812 		return (netstack_find_by_zoneid(zoneid));
813 }
814 
815 /*
816  * Find a stack instance given the zoneid.
817  * Increases the reference count if found; caller must do a
818  * netstack_rele().
819  *
820  * If there is no exact match then assume the shared stack instance
821  * matches.
822  *
823  * Skip the unitialized ones.
824  */
825 netstack_t *
826 netstack_find_by_zoneid(zoneid_t zoneid)
827 {
828 	netstack_t *ns;
829 	zone_t *zone;
830 
831 	zone = zone_find_by_id(zoneid);
832 
833 	if (zone == NULL)
834 		return (NULL);
835 
836 	ns = zone->zone_netstack;
837 	ASSERT(ns != NULL);
838 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
839 		ns = NULL;
840 	else
841 		netstack_hold(ns);
842 
843 	zone_rele(zone);
844 	return (ns);
845 }
846 
847 /*
848  * Find a stack instance given the zoneid.
849  * Increases the reference count if found; caller must do a
850  * netstack_rele().
851  *
852  * If there is no exact match then assume the shared stack instance
853  * matches.
854  *
855  * Skip the unitialized ones.
856  *
857  * NOTE: The caller must hold zonehash_lock.
858  */
859 netstack_t *
860 netstack_find_by_zoneid_nolock(zoneid_t zoneid)
861 {
862 	netstack_t *ns;
863 	zone_t *zone;
864 
865 	zone = zone_find_by_id_nolock(zoneid);
866 
867 	if (zone == NULL)
868 		return (NULL);
869 
870 	ns = zone->zone_netstack;
871 	ASSERT(ns != NULL);
872 
873 	if (ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))
874 		ns = NULL;
875 	else
876 		netstack_hold(ns);
877 
878 	zone_rele(zone);
879 	return (ns);
880 }
881 
882 /*
883  * Find a stack instance given the stackid with exact match?
884  * Increases the reference count if found; caller must do a
885  * netstack_rele().
886  *
887  * Skip the unitialized ones.
888  */
889 netstack_t *
890 netstack_find_by_stackid(netstackid_t stackid)
891 {
892 	netstack_t *ns;
893 
894 	mutex_enter(&netstack_g_lock);
895 	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
896 		mutex_enter(&ns->netstack_lock);
897 		if (ns->netstack_stackid == stackid &&
898 		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
899 			mutex_exit(&ns->netstack_lock);
900 			netstack_hold(ns);
901 			mutex_exit(&netstack_g_lock);
902 			return (ns);
903 		}
904 		mutex_exit(&ns->netstack_lock);
905 	}
906 	mutex_exit(&netstack_g_lock);
907 	return (NULL);
908 }
909 
910 void
911 netstack_rele(netstack_t *ns)
912 {
913 	netstack_t **nsp;
914 	boolean_t found;
915 	int refcnt, numzones;
916 
917 	mutex_enter(&ns->netstack_lock);
918 	ASSERT(ns->netstack_refcnt > 0);
919 	ns->netstack_refcnt--;
920 	/*
921 	 * As we drop the lock additional netstack_rele()s can come in
922 	 * and decrement the refcnt to zero and free the netstack_t.
923 	 * Store pointers in local variables and if we were not the last
924 	 * then don't reference the netstack_t after that.
925 	 */
926 	refcnt = ns->netstack_refcnt;
927 	numzones = ns->netstack_numzones;
928 	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
929 	mutex_exit(&ns->netstack_lock);
930 
931 	if (refcnt == 0 && numzones == 0) {
932 		/*
933 		 * Time to call the destroy functions and free up
934 		 * the structure
935 		 */
936 		netstack_stack_inactive(ns);
937 
938 		/* Make sure nothing increased the references */
939 		ASSERT(ns->netstack_refcnt == 0);
940 		ASSERT(ns->netstack_numzones == 0);
941 
942 		/* Finally remove from list of netstacks */
943 		mutex_enter(&netstack_g_lock);
944 		found = B_FALSE;
945 		for (nsp = &netstack_head; *nsp != NULL;
946 		    nsp = &(*nsp)->netstack_next) {
947 			if (*nsp == ns) {
948 				*nsp = ns->netstack_next;
949 				ns->netstack_next = NULL;
950 				found = B_TRUE;
951 				break;
952 			}
953 		}
954 		ASSERT(found);
955 		mutex_exit(&netstack_g_lock);
956 
957 		/* Make sure nothing increased the references */
958 		ASSERT(ns->netstack_refcnt == 0);
959 		ASSERT(ns->netstack_numzones == 0);
960 
961 		ASSERT(ns->netstack_flags & NSF_CLOSING);
962 		kmem_free(ns, sizeof (*ns));
963 	}
964 }
965 
966 void
967 netstack_hold(netstack_t *ns)
968 {
969 	mutex_enter(&ns->netstack_lock);
970 	ns->netstack_refcnt++;
971 	ASSERT(ns->netstack_refcnt > 0);
972 	mutex_exit(&ns->netstack_lock);
973 	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
974 }
975 
976 /*
977  * To support kstat_create_netstack() using kstat_zone_add we need
978  * to track both
979  *  - all zoneids that use the global/shared stack
980  *  - all kstats that have been added for the shared stack
981  */
982 kstat_t *
983 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
984     char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
985     netstackid_t ks_netstackid)
986 {
987 	kstat_t *ks;
988 
989 	if (ks_netstackid == GLOBAL_NETSTACKID) {
990 		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
991 		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
992 		if (ks != NULL)
993 			netstack_shared_kstat_add(ks);
994 		return (ks);
995 	} else {
996 		zoneid_t zoneid = ks_netstackid;
997 
998 		return (kstat_create_zone(ks_module, ks_instance, ks_name,
999 			ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1000 	}
1001 }
1002 
1003 void
1004 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1005 {
1006 	if (ks_netstackid == GLOBAL_NETSTACKID) {
1007 		netstack_shared_kstat_remove(ks);
1008 	}
1009 	kstat_delete(ks);
1010 }
1011 
1012 static void
1013 netstack_shared_zone_add(zoneid_t zoneid)
1014 {
1015 	struct shared_zone_list *sz;
1016 	struct shared_kstat_list *sk;
1017 
1018 	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1019 	sz->sz_zoneid = zoneid;
1020 
1021 	/* Insert in list */
1022 	mutex_enter(&netstack_shared_lock);
1023 	sz->sz_next = netstack_shared_zones;
1024 	netstack_shared_zones = sz;
1025 
1026 	/*
1027 	 * Perform kstat_zone_add for each existing shared stack kstat.
1028 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1029 	 */
1030 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1031 		kstat_zone_add(sk->sk_kstat, zoneid);
1032 	}
1033 	mutex_exit(&netstack_shared_lock);
1034 }
1035 
1036 static void
1037 netstack_shared_zone_remove(zoneid_t zoneid)
1038 {
1039 	struct shared_zone_list **szp, *sz;
1040 	struct shared_kstat_list *sk;
1041 
1042 	/* Find in list */
1043 	mutex_enter(&netstack_shared_lock);
1044 	sz = NULL;
1045 	for (szp = &netstack_shared_zones; *szp != NULL;
1046 	    szp = &((*szp)->sz_next)) {
1047 		if ((*szp)->sz_zoneid == zoneid) {
1048 			sz = *szp;
1049 			break;
1050 		}
1051 	}
1052 	/* We must find it */
1053 	ASSERT(sz != NULL);
1054 	*szp = sz->sz_next;
1055 	sz->sz_next = NULL;
1056 
1057 	/*
1058 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1059 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1060 	 */
1061 	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1062 		kstat_zone_remove(sk->sk_kstat, zoneid);
1063 	}
1064 	mutex_exit(&netstack_shared_lock);
1065 
1066 	kmem_free(sz, sizeof (*sz));
1067 }
1068 
1069 static void
1070 netstack_shared_kstat_add(kstat_t *ks)
1071 {
1072 	struct shared_zone_list *sz;
1073 	struct shared_kstat_list *sk;
1074 
1075 	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1076 	sk->sk_kstat = ks;
1077 
1078 	/* Insert in list */
1079 	mutex_enter(&netstack_shared_lock);
1080 	sk->sk_next = netstack_shared_kstats;
1081 	netstack_shared_kstats = sk;
1082 
1083 	/*
1084 	 * Perform kstat_zone_add for each existing shared stack zone.
1085 	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1086 	 */
1087 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1088 		kstat_zone_add(ks, sz->sz_zoneid);
1089 	}
1090 	mutex_exit(&netstack_shared_lock);
1091 }
1092 
1093 static void
1094 netstack_shared_kstat_remove(kstat_t *ks)
1095 {
1096 	struct shared_zone_list *sz;
1097 	struct shared_kstat_list **skp, *sk;
1098 
1099 	/* Find in list */
1100 	mutex_enter(&netstack_shared_lock);
1101 	sk = NULL;
1102 	for (skp = &netstack_shared_kstats; *skp != NULL;
1103 	    skp = &((*skp)->sk_next)) {
1104 		if ((*skp)->sk_kstat == ks) {
1105 			sk = *skp;
1106 			break;
1107 		}
1108 	}
1109 	/* Must find it */
1110 	ASSERT(sk != NULL);
1111 	*skp = sk->sk_next;
1112 	sk->sk_next = NULL;
1113 
1114 	/*
1115 	 * Perform kstat_zone_remove for each existing shared stack kstat.
1116 	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1117 	 */
1118 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1119 		kstat_zone_remove(ks, sz->sz_zoneid);
1120 	}
1121 	mutex_exit(&netstack_shared_lock);
1122 	kmem_free(sk, sizeof (*sk));
1123 }
1124 
1125 /*
1126  * If a zoneid is part of the shared zone, return true
1127  */
1128 static boolean_t
1129 netstack_find_shared_zoneid(zoneid_t zoneid)
1130 {
1131 	struct shared_zone_list *sz;
1132 
1133 	mutex_enter(&netstack_shared_lock);
1134 	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1135 		if (sz->sz_zoneid == zoneid) {
1136 			mutex_exit(&netstack_shared_lock);
1137 			return (B_TRUE);
1138 		}
1139 	}
1140 	mutex_exit(&netstack_shared_lock);
1141 	return (B_FALSE);
1142 }
1143 
1144 /*
1145  * Hide the fact that zoneids and netstackids are allocated from
1146  * the same space in the current implementation.
1147  * XXX could add checks that the stackid/zoneids are valid...
1148  */
1149 zoneid_t
1150 netstackid_to_zoneid(netstackid_t stackid)
1151 {
1152 	return (stackid);
1153 }
1154 
1155 netstackid_t
1156 zoneid_to_netstackid(zoneid_t zoneid)
1157 {
1158 	if (netstack_find_shared_zoneid(zoneid))
1159 		return (GLOBAL_ZONEID);
1160 	else
1161 		return (zoneid);
1162 }
1163 
1164 /*
1165  * Simplistic support for walking all the handles.
1166  * Example usage:
1167  *	netstack_handle_t nh;
1168  *	netstack_t *ns;
1169  *
1170  *	netstack_next_init(&nh);
1171  *	while ((ns = netstack_next(&nh)) != NULL) {
1172  *		do something;
1173  *		netstack_rele(ns);
1174  *	}
1175  *	netstack_next_fini(&nh);
1176  */
1177 void
1178 netstack_next_init(netstack_handle_t *handle)
1179 {
1180 	*handle = 0;
1181 }
1182 
1183 /* ARGSUSED */
1184 void
1185 netstack_next_fini(netstack_handle_t *handle)
1186 {
1187 }
1188 
1189 netstack_t *
1190 netstack_next(netstack_handle_t *handle)
1191 {
1192 	netstack_t *ns;
1193 	int i, end;
1194 
1195 	end = *handle;
1196 	/* Walk skipping *handle number of instances */
1197 
1198 	/* Look if there is a matching stack instance */
1199 	mutex_enter(&netstack_g_lock);
1200 	ns = netstack_head;
1201 	for (i = 0; i < end; i++) {
1202 		if (ns == NULL)
1203 			break;
1204 		ns = ns->netstack_next;
1205 	}
1206 	/* skip those with that aren't really here */
1207 	while (ns != NULL) {
1208 		mutex_enter(&ns->netstack_lock);
1209 		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1210 			mutex_exit(&ns->netstack_lock);
1211 			break;
1212 		}
1213 		mutex_exit(&ns->netstack_lock);
1214 		end++;
1215 		ns = ns->netstack_next;
1216 	}
1217 	if (ns != NULL) {
1218 		*handle = end + 1;
1219 		netstack_hold(ns);
1220 	}
1221 	mutex_exit(&netstack_g_lock);
1222 	return (ns);
1223 }
1224