xref: /illumos-gate/usr/src/uts/common/os/netstack.c (revision 704ca705399a21dba6e20e147ac68d7a067570e6)
1  /*
2   * CDDL HEADER START
3   *
4   * The contents of this file are subject to the terms of the
5   * Common Development and Distribution License (the "License").
6   * You may not use this file except in compliance with the License.
7   *
8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9   * or http://www.opensolaris.org/os/licensing.
10   * See the License for the specific language governing permissions
11   * and limitations under the License.
12   *
13   * When distributing Covered Code, include this CDDL HEADER in each
14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15   * If applicable, add the following below this CDDL HEADER, with the
16   * fields enclosed by brackets "[]" replaced with your own identifying
17   * information: Portions Copyright [yyyy] [name of copyright owner]
18   *
19   * CDDL HEADER END
20   */
21  
22  /*
23   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24   * Use is subject to license terms.
25   * Copyright (c) 2017, Joyent, Inc.  All rights reserved.
26   */
27  
28  #include <sys/param.h>
29  #include <sys/sysmacros.h>
30  #include <sys/vm.h>
31  #include <sys/proc.h>
32  #include <sys/tuneable.h>
33  #include <sys/systm.h>
34  #include <sys/cmn_err.h>
35  #include <sys/debug.h>
36  #include <sys/sdt.h>
37  #include <sys/mutex.h>
38  #include <sys/bitmap.h>
39  #include <sys/atomic.h>
40  #include <sys/sunddi.h>
41  #include <sys/kobj.h>
42  #include <sys/disp.h>
43  #include <vm/seg_kmem.h>
44  #include <sys/zone.h>
45  #include <sys/netstack.h>
46  
47  /*
48   * What we use so that the zones framework can tell us about new zones,
49   * which we use to create new stacks.
50   */
51  static zone_key_t netstack_zone_key;
52  
53  static int	netstack_initialized = 0;
54  
55  /*
56   * Track the registered netstacks.
57   * The global lock protects
58   * - ns_reg
59   * - the list starting at netstack_head and following the netstack_next
60   *   pointers.
61   */
62  static kmutex_t netstack_g_lock;
63  
64  /*
65   * Registry of netstacks with their create/shutdown/destory functions.
66   */
67  static struct netstack_registry	ns_reg[NS_MAX];
68  
69  /*
70   * Global list of existing stacks.  We use this when a new zone with
71   * an exclusive IP instance is created.
72   *
73   * Note that in some cases a netstack_t needs to stay around after the zone
74   * has gone away. This is because there might be outstanding references
75   * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76   * structure and all the foo_stack_t's hanging off of it will be cleaned up
77   * when the last reference to it is dropped.
78   * However, the same zone might be rebooted. That is handled using the
79   * assumption that the zones framework picks a new zoneid each time a zone
80   * is (re)booted. We assert for that condition in netstack_zone_create().
81   * Thus the old netstack_t can take its time for things to time out.
82   */
83  static netstack_t *netstack_head;
84  
85  /*
86   * To support kstat_create_netstack() using kstat_zone_add we need
87   * to track both
88   *  - all zoneids that use the global/shared stack
89   *  - all kstats that have been added for the shared stack
90   */
91  struct shared_zone_list {
92  	struct shared_zone_list *sz_next;
93  	zoneid_t		sz_zoneid;
94  };
95  
96  struct shared_kstat_list {
97  	struct shared_kstat_list *sk_next;
98  	kstat_t			 *sk_kstat;
99  };
100  
101  static kmutex_t netstack_shared_lock;	/* protects the following two */
102  static struct shared_zone_list	*netstack_shared_zones;
103  static struct shared_kstat_list	*netstack_shared_kstats;
104  
105  static void	*netstack_zone_create(zoneid_t zoneid);
106  static void	netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107  static void	netstack_zone_destroy(zoneid_t zoneid, void *arg);
108  
109  static void	netstack_shared_zone_add(zoneid_t zoneid);
110  static void	netstack_shared_zone_remove(zoneid_t zoneid);
111  static void	netstack_shared_kstat_add(kstat_t *ks);
112  static void	netstack_shared_kstat_remove(kstat_t *ks);
113  
114  typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115  
116  static void	apply_all_netstacks(int, applyfn_t *);
117  static void	apply_all_modules(netstack_t *, applyfn_t *);
118  static void	apply_all_modules_reverse(netstack_t *, applyfn_t *);
119  static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
120  static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
121  static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
122  static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
123  static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
124      kmutex_t *);
125  
126  static void netstack_hold_locked(netstack_t *);
127  
128  static ksema_t netstack_reap_limiter;
129  /*
130   * Hard-coded constant, but since this is not tunable in real-time, it seems
131   * making it an /etc/system tunable is better than nothing.
132   */
133  uint_t netstack_outstanding_reaps = 1024;
134  
135  void
netstack_init(void)136  netstack_init(void)
137  {
138  	mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
139  	mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
140  
141  	sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
142  	    SEMA_DRIVER, NULL);
143  
144  	netstack_initialized = 1;
145  
146  	/*
147  	 * We want to be informed each time a zone is created or
148  	 * destroyed in the kernel, so we can maintain the
149  	 * stack instance information.
150  	 */
151  	zone_key_create(&netstack_zone_key, netstack_zone_create,
152  	    netstack_zone_shutdown, netstack_zone_destroy);
153  }
154  
155  /*
156   * Register a new module with the framework.
157   * This registers interest in changes to the set of netstacks.
158   * The createfn and destroyfn are required, but the shutdownfn can be
159   * NULL.
160   * Note that due to the current zsd implementation, when the create
161   * function is called the zone isn't fully present, thus functions
162   * like zone_find_by_* will fail, hence the create function can not
163   * use many zones kernel functions including zcmn_err().
164   */
165  void
netstack_register(int moduleid,void * (* module_create)(netstackid_t,netstack_t *),void (* module_shutdown)(netstackid_t,void *),void (* module_destroy)(netstackid_t,void *))166  netstack_register(int moduleid,
167      void *(*module_create)(netstackid_t, netstack_t *),
168      void (*module_shutdown)(netstackid_t, void *),
169      void (*module_destroy)(netstackid_t, void *))
170  {
171  	netstack_t *ns;
172  
173  	ASSERT(netstack_initialized);
174  	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
175  	ASSERT(module_create != NULL);
176  
177  	/*
178  	 * Make instances created after this point in time run the create
179  	 * callback.
180  	 */
181  	mutex_enter(&netstack_g_lock);
182  	ASSERT(ns_reg[moduleid].nr_create == NULL);
183  	ASSERT(ns_reg[moduleid].nr_flags == 0);
184  	ns_reg[moduleid].nr_create = module_create;
185  	ns_reg[moduleid].nr_shutdown = module_shutdown;
186  	ns_reg[moduleid].nr_destroy = module_destroy;
187  	ns_reg[moduleid].nr_flags = NRF_REGISTERED;
188  
189  	/*
190  	 * Determine the set of stacks that exist before we drop the lock.
191  	 * Set NSS_CREATE_NEEDED for each of those.
192  	 * netstacks which have been deleted will have NSS_CREATE_COMPLETED
193  	 * set, but check NSF_CLOSING to be sure.
194  	 */
195  	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
196  		nm_state_t *nms = &ns->netstack_m_state[moduleid];
197  
198  		mutex_enter(&ns->netstack_lock);
199  		if (!(ns->netstack_flags & NSF_CLOSING) &&
200  		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
201  			nms->nms_flags |= NSS_CREATE_NEEDED;
202  			DTRACE_PROBE2(netstack__create__needed,
203  			    netstack_t *, ns, int, moduleid);
204  		}
205  		mutex_exit(&ns->netstack_lock);
206  	}
207  	mutex_exit(&netstack_g_lock);
208  
209  	/*
210  	 * At this point in time a new instance can be created or an instance
211  	 * can be destroyed, or some other module can register or unregister.
212  	 * Make sure we either run all the create functions for this moduleid
213  	 * or we wait for any other creators for this moduleid.
214  	 */
215  	apply_all_netstacks(moduleid, netstack_apply_create);
216  }
217  
218  void
netstack_unregister(int moduleid)219  netstack_unregister(int moduleid)
220  {
221  	netstack_t *ns;
222  
223  	ASSERT(moduleid >= 0 && moduleid < NS_MAX);
224  
225  	ASSERT(ns_reg[moduleid].nr_create != NULL);
226  	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
227  
228  	mutex_enter(&netstack_g_lock);
229  	/*
230  	 * Determine the set of stacks that exist before we drop the lock.
231  	 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
232  	 * That ensures that when we return all the callbacks for existing
233  	 * instances have completed. And since we set NRF_DYING no new
234  	 * instances can use this module.
235  	 */
236  	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
237  		boolean_t created = B_FALSE;
238  		nm_state_t *nms = &ns->netstack_m_state[moduleid];
239  
240  		mutex_enter(&ns->netstack_lock);
241  
242  		/*
243  		 * We need to be careful here. We could actually have a netstack
244  		 * being created as we speak waiting for us to let go of this
245  		 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
246  		 * have gotten to the point of completing it yet. If
247  		 * NSS_CREATE_NEEDED, we can safely just remove it here and
248  		 * never create the module. However, if NSS_CREATE_INPROGRESS is
249  		 * set, we need to still flag this module for shutdown and
250  		 * deletion, just as though it had reached NSS_CREATE_COMPLETED.
251  		 *
252  		 * It is safe to do that because of two different guarantees
253  		 * that exist in the system. The first is that before we do a
254  		 * create, shutdown, or destroy, we ensure that nothing else is
255  		 * in progress in the system for this netstack and wait for it
256  		 * to complete. Secondly, because the zone is being created, we
257  		 * know that the following call to apply_all_netstack will block
258  		 * on the zone finishing its initialization.
259  		 */
260  		if (nms->nms_flags & NSS_CREATE_NEEDED)
261  			nms->nms_flags &= ~NSS_CREATE_NEEDED;
262  
263  		if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
264  		    nms->nms_flags & NSS_CREATE_COMPLETED)
265  			created = B_TRUE;
266  
267  		if (ns_reg[moduleid].nr_shutdown != NULL && created &&
268  		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
269  		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
270  			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
271  			DTRACE_PROBE2(netstack__shutdown__needed,
272  			    netstack_t *, ns, int, moduleid);
273  		}
274  		if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
275  		    ns_reg[moduleid].nr_destroy != NULL && created &&
276  		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
277  			nms->nms_flags |= NSS_DESTROY_NEEDED;
278  			DTRACE_PROBE2(netstack__destroy__needed,
279  			    netstack_t *, ns, int, moduleid);
280  		}
281  		mutex_exit(&ns->netstack_lock);
282  	}
283  	/*
284  	 * Prevent any new netstack from calling the registered create
285  	 * function, while keeping the function pointers in place until the
286  	 * shutdown and destroy callbacks are complete.
287  	 */
288  	ns_reg[moduleid].nr_flags |= NRF_DYING;
289  	mutex_exit(&netstack_g_lock);
290  
291  	apply_all_netstacks(moduleid, netstack_apply_shutdown);
292  	apply_all_netstacks(moduleid, netstack_apply_destroy);
293  
294  	/*
295  	 * Clear the nms_flags so that we can handle this module
296  	 * being loaded again.
297  	 * Also remove the registered functions.
298  	 */
299  	mutex_enter(&netstack_g_lock);
300  	ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
301  	ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
302  	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
303  		nm_state_t *nms = &ns->netstack_m_state[moduleid];
304  
305  		mutex_enter(&ns->netstack_lock);
306  		if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
307  			nms->nms_flags = 0;
308  			DTRACE_PROBE2(netstack__destroy__done,
309  			    netstack_t *, ns, int, moduleid);
310  		}
311  		mutex_exit(&ns->netstack_lock);
312  	}
313  
314  	ns_reg[moduleid].nr_create = NULL;
315  	ns_reg[moduleid].nr_shutdown = NULL;
316  	ns_reg[moduleid].nr_destroy = NULL;
317  	ns_reg[moduleid].nr_flags = 0;
318  	mutex_exit(&netstack_g_lock);
319  }
320  
321  /*
322   * Lookup and/or allocate a netstack for this zone.
323   */
324  static void *
netstack_zone_create(zoneid_t zoneid)325  netstack_zone_create(zoneid_t zoneid)
326  {
327  	netstackid_t stackid;
328  	netstack_t *ns;
329  	netstack_t **nsp;
330  	zone_t	*zone;
331  	int i;
332  
333  	ASSERT(netstack_initialized);
334  
335  	zone = zone_find_by_id_nolock(zoneid);
336  	ASSERT(zone != NULL);
337  
338  	if (zone->zone_flags & ZF_NET_EXCL) {
339  		stackid = zoneid;
340  	} else {
341  		/* Look for the stack instance for the global */
342  		stackid = GLOBAL_NETSTACKID;
343  	}
344  
345  	/* Allocate even if it isn't needed; simplifies locking */
346  	ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
347  
348  	/* Look if there is a matching stack instance */
349  	mutex_enter(&netstack_g_lock);
350  	for (nsp = &netstack_head; *nsp != NULL;
351  	    nsp = &((*nsp)->netstack_next)) {
352  		if ((*nsp)->netstack_stackid == stackid) {
353  			/*
354  			 * Should never find a pre-existing exclusive stack
355  			 */
356  			VERIFY(stackid == GLOBAL_NETSTACKID);
357  			kmem_free(ns, sizeof (netstack_t));
358  			ns = *nsp;
359  			mutex_enter(&ns->netstack_lock);
360  			ns->netstack_numzones++;
361  			mutex_exit(&ns->netstack_lock);
362  			mutex_exit(&netstack_g_lock);
363  			DTRACE_PROBE1(netstack__inc__numzones,
364  			    netstack_t *, ns);
365  			/* Record that we have a new shared stack zone */
366  			netstack_shared_zone_add(zoneid);
367  			zone->zone_netstack = ns;
368  			return (ns);
369  		}
370  	}
371  	/* Not found */
372  	mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
373  	cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
374  	ns->netstack_stackid = zoneid;
375  	ns->netstack_numzones = 1;
376  	ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
377  	ns->netstack_flags = NSF_UNINIT;
378  	*nsp = ns;
379  	zone->zone_netstack = ns;
380  
381  	mutex_enter(&ns->netstack_lock);
382  	/*
383  	 * Mark this netstack as having a CREATE running so
384  	 * any netstack_register/netstack_unregister waits for
385  	 * the existing create callbacks to complete in moduleid order
386  	 */
387  	ns->netstack_flags |= NSF_ZONE_CREATE;
388  
389  	/*
390  	 * Determine the set of module create functions that need to be
391  	 * called before we drop the lock.
392  	 * Set NSS_CREATE_NEEDED for each of those.
393  	 * Skip any with NRF_DYING set, since those are in the process of
394  	 * going away, by checking for flags being exactly NRF_REGISTERED.
395  	 */
396  	for (i = 0; i < NS_MAX; i++) {
397  		nm_state_t *nms = &ns->netstack_m_state[i];
398  
399  		cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
400  
401  		if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
402  		    (nms->nms_flags & NSS_CREATE_ALL) == 0) {
403  			nms->nms_flags |= NSS_CREATE_NEEDED;
404  			DTRACE_PROBE2(netstack__create__needed,
405  			    netstack_t *, ns, int, i);
406  		}
407  	}
408  	mutex_exit(&ns->netstack_lock);
409  	mutex_exit(&netstack_g_lock);
410  
411  	apply_all_modules(ns, netstack_apply_create);
412  
413  	/* Tell any waiting netstack_register/netstack_unregister to proceed */
414  	mutex_enter(&ns->netstack_lock);
415  	ns->netstack_flags &= ~NSF_UNINIT;
416  	ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
417  	ns->netstack_flags &= ~NSF_ZONE_CREATE;
418  	cv_broadcast(&ns->netstack_cv);
419  	mutex_exit(&ns->netstack_lock);
420  
421  	return (ns);
422  }
423  
424  /* ARGSUSED */
425  static void
netstack_zone_shutdown(zoneid_t zoneid,void * arg)426  netstack_zone_shutdown(zoneid_t zoneid, void *arg)
427  {
428  	netstack_t *ns = (netstack_t *)arg;
429  	int i;
430  
431  	ASSERT(arg != NULL);
432  
433  	mutex_enter(&ns->netstack_lock);
434  	ASSERT(ns->netstack_numzones > 0);
435  	if (ns->netstack_numzones != 1) {
436  		/* Stack instance being used by other zone */
437  		mutex_exit(&ns->netstack_lock);
438  		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
439  		return;
440  	}
441  	mutex_exit(&ns->netstack_lock);
442  
443  	mutex_enter(&netstack_g_lock);
444  	mutex_enter(&ns->netstack_lock);
445  	/*
446  	 * Mark this netstack as having a SHUTDOWN running so
447  	 * any netstack_register/netstack_unregister waits for
448  	 * the existing create callbacks to complete in moduleid order
449  	 */
450  	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
451  	ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
452  
453  	/*
454  	 * Determine the set of stacks that exist before we drop the lock.
455  	 * Set NSS_SHUTDOWN_NEEDED for each of those.
456  	 */
457  	for (i = 0; i < NS_MAX; i++) {
458  		nm_state_t *nms = &ns->netstack_m_state[i];
459  
460  		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
461  		    ns_reg[i].nr_shutdown != NULL &&
462  		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
463  		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
464  			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
465  			DTRACE_PROBE2(netstack__shutdown__needed,
466  			    netstack_t *, ns, int, i);
467  		}
468  	}
469  	mutex_exit(&ns->netstack_lock);
470  	mutex_exit(&netstack_g_lock);
471  
472  	/*
473  	 * Call the shutdown function for all registered modules for this
474  	 * netstack.
475  	 */
476  	apply_all_modules_reverse(ns, netstack_apply_shutdown);
477  
478  	/* Tell any waiting netstack_register/netstack_unregister to proceed */
479  	mutex_enter(&ns->netstack_lock);
480  	ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
481  	ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
482  	cv_broadcast(&ns->netstack_cv);
483  	mutex_exit(&ns->netstack_lock);
484  }
485  
486  /*
487   * Common routine to release a zone.
488   * If this was the last zone using the stack instance then prepare to
489   * have the refcnt dropping to zero free the zone.
490   */
491  /* ARGSUSED */
492  static void
netstack_zone_destroy(zoneid_t zoneid,void * arg)493  netstack_zone_destroy(zoneid_t zoneid, void *arg)
494  {
495  	netstack_t *ns = (netstack_t *)arg;
496  
497  	ASSERT(arg != NULL);
498  
499  	mutex_enter(&ns->netstack_lock);
500  	ASSERT(ns->netstack_numzones > 0);
501  	ns->netstack_numzones--;
502  	if (ns->netstack_numzones != 0) {
503  		/* Stack instance being used by other zone */
504  		mutex_exit(&ns->netstack_lock);
505  		ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
506  		/* Record that we a shared stack zone has gone away */
507  		netstack_shared_zone_remove(zoneid);
508  		return;
509  	}
510  	/*
511  	 * Set CLOSING so that netstack_find_by will not find it.
512  	 */
513  	ns->netstack_flags |= NSF_CLOSING;
514  	mutex_exit(&ns->netstack_lock);
515  	DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
516  	/* No other thread can call zone_destroy for this stack */
517  
518  	/*
519  	 * Decrease refcnt to account for the one in netstack_zone_init()
520  	 */
521  	netstack_rele(ns);
522  }
523  
524  /*
525   * Called when the reference count drops to zero.
526   * Call the destroy functions for each registered module.
527   */
528  static void
netstack_stack_inactive(netstack_t * ns)529  netstack_stack_inactive(netstack_t *ns)
530  {
531  	int i;
532  
533  	mutex_enter(&netstack_g_lock);
534  	mutex_enter(&ns->netstack_lock);
535  	/*
536  	 * Mark this netstack as having a DESTROY running so
537  	 * any netstack_register/netstack_unregister waits for
538  	 * the existing destroy callbacks to complete in reverse moduleid order
539  	 */
540  	ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
541  	ns->netstack_flags |= NSF_ZONE_DESTROY;
542  	/*
543  	 * If the shutdown callback wasn't called earlier (e.g., if this is
544  	 * a netstack shared between multiple zones), then we schedule it now.
545  	 *
546  	 * Determine the set of stacks that exist before we drop the lock.
547  	 * Set NSS_DESTROY_NEEDED for each of those. That
548  	 * ensures that when we return all the callbacks for existing
549  	 * instances have completed.
550  	 */
551  	for (i = 0; i < NS_MAX; i++) {
552  		nm_state_t *nms = &ns->netstack_m_state[i];
553  
554  		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
555  		    ns_reg[i].nr_shutdown != NULL &&
556  		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
557  		    (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
558  			nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
559  			DTRACE_PROBE2(netstack__shutdown__needed,
560  			    netstack_t *, ns, int, i);
561  		}
562  
563  		if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
564  		    ns_reg[i].nr_destroy != NULL &&
565  		    (nms->nms_flags & NSS_CREATE_COMPLETED) &&
566  		    (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
567  			nms->nms_flags |= NSS_DESTROY_NEEDED;
568  			DTRACE_PROBE2(netstack__destroy__needed,
569  			    netstack_t *, ns, int, i);
570  		}
571  	}
572  	mutex_exit(&ns->netstack_lock);
573  	mutex_exit(&netstack_g_lock);
574  
575  	/*
576  	 * Call the shutdown and destroy functions for all registered modules
577  	 * for this netstack.
578  	 *
579  	 * Since there are some ordering dependencies between the modules we
580  	 * tear them down in the reverse order of what was used to create them.
581  	 *
582  	 * Since a netstack_t is never reused (when a zone is rebooted it gets
583  	 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
584  	 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
585  	 * That is different than in the netstack_unregister() case.
586  	 */
587  	apply_all_modules_reverse(ns, netstack_apply_shutdown);
588  	apply_all_modules_reverse(ns, netstack_apply_destroy);
589  
590  	/* Tell any waiting netstack_register/netstack_unregister to proceed */
591  	mutex_enter(&ns->netstack_lock);
592  	ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
593  	ns->netstack_flags &= ~NSF_ZONE_DESTROY;
594  	cv_broadcast(&ns->netstack_cv);
595  	mutex_exit(&ns->netstack_lock);
596  }
597  
598  /*
599   * Apply a function to all netstacks for a particular moduleid.
600   *
601   * If there is any zone activity (due to a zone being created, shutdown,
602   * or destroyed) we wait for that to complete before we proceed. This ensures
603   * that the moduleids are processed in order when a zone is created or
604   * destroyed.
605   *
606   * The applyfn has to drop netstack_g_lock if it does some work.
607   * In that case we don't follow netstack_next,
608   * even if it is possible to do so without any hazards. This is
609   * because we want the design to allow for the list of netstacks threaded
610   * by netstack_next to change in any arbitrary way during the time the
611   * lock was dropped.
612   *
613   * It is safe to restart the loop at netstack_head since the applyfn
614   * changes netstack_m_state as it processes things, so a subsequent
615   * pass through will have no effect in applyfn, hence the loop will terminate
616   * in at worst O(N^2).
617   */
618  static void
apply_all_netstacks(int moduleid,applyfn_t * applyfn)619  apply_all_netstacks(int moduleid, applyfn_t *applyfn)
620  {
621  	netstack_t *ns;
622  
623  	mutex_enter(&netstack_g_lock);
624  	ns = netstack_head;
625  	while (ns != NULL) {
626  		if (wait_for_zone_creator(ns, &netstack_g_lock)) {
627  			/* Lock dropped - restart at head */
628  			ns = netstack_head;
629  		} else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
630  			/* Lock dropped - restart at head */
631  			ns = netstack_head;
632  		} else {
633  			ns = ns->netstack_next;
634  		}
635  	}
636  	mutex_exit(&netstack_g_lock);
637  }
638  
639  /*
640   * Apply a function to all moduleids for a particular netstack.
641   *
642   * Since the netstack linkage doesn't matter in this case we can
643   * ignore whether the function drops the lock.
644   */
645  static void
apply_all_modules(netstack_t * ns,applyfn_t * applyfn)646  apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
647  {
648  	int i;
649  
650  	mutex_enter(&netstack_g_lock);
651  	for (i = 0; i < NS_MAX; i++) {
652  		/*
653  		 * We don't care whether the lock was dropped
654  		 * since we are not iterating over netstack_head.
655  		 */
656  		(void) (applyfn)(&netstack_g_lock, ns, i);
657  	}
658  	mutex_exit(&netstack_g_lock);
659  }
660  
661  /* Like the above but in reverse moduleid order */
662  static void
apply_all_modules_reverse(netstack_t * ns,applyfn_t * applyfn)663  apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
664  {
665  	int i;
666  
667  	mutex_enter(&netstack_g_lock);
668  	for (i = NS_MAX-1; i >= 0; i--) {
669  		/*
670  		 * We don't care whether the lock was dropped
671  		 * since we are not iterating over netstack_head.
672  		 */
673  		(void) (applyfn)(&netstack_g_lock, ns, i);
674  	}
675  	mutex_exit(&netstack_g_lock);
676  }
677  
678  /*
679   * Call the create function for the ns and moduleid if CREATE_NEEDED
680   * is set.
681   * If some other thread gets here first and sets *_INPROGRESS, then
682   * we wait for that thread to complete so that we can ensure that
683   * all the callbacks are done when we've looped over all netstacks/moduleids.
684   *
685   * When we call the create function, we temporarily drop the netstack_lock
686   * held by the caller, and return true to tell the caller it needs to
687   * re-evalute the state.
688   */
689  static boolean_t
netstack_apply_create(kmutex_t * lockp,netstack_t * ns,int moduleid)690  netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
691  {
692  	void *result;
693  	netstackid_t stackid;
694  	nm_state_t *nms = &ns->netstack_m_state[moduleid];
695  	boolean_t dropped = B_FALSE;
696  
697  	ASSERT(MUTEX_HELD(lockp));
698  	mutex_enter(&ns->netstack_lock);
699  
700  	if (wait_for_nms_inprogress(ns, nms, lockp))
701  		dropped = B_TRUE;
702  
703  	if (nms->nms_flags & NSS_CREATE_NEEDED) {
704  		nms->nms_flags &= ~NSS_CREATE_NEEDED;
705  		nms->nms_flags |= NSS_CREATE_INPROGRESS;
706  		DTRACE_PROBE2(netstack__create__inprogress,
707  		    netstack_t *, ns, int, moduleid);
708  		mutex_exit(&ns->netstack_lock);
709  		mutex_exit(lockp);
710  		dropped = B_TRUE;
711  
712  		ASSERT(ns_reg[moduleid].nr_create != NULL);
713  		stackid = ns->netstack_stackid;
714  		DTRACE_PROBE2(netstack__create__start,
715  		    netstackid_t, stackid,
716  		    netstack_t *, ns);
717  		result = (ns_reg[moduleid].nr_create)(stackid, ns);
718  		DTRACE_PROBE2(netstack__create__end,
719  		    void *, result, netstack_t *, ns);
720  
721  		ASSERT(result != NULL);
722  		mutex_enter(lockp);
723  		mutex_enter(&ns->netstack_lock);
724  		ns->netstack_modules[moduleid] = result;
725  		nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
726  		nms->nms_flags |= NSS_CREATE_COMPLETED;
727  		cv_broadcast(&nms->nms_cv);
728  		DTRACE_PROBE2(netstack__create__completed,
729  		    netstack_t *, ns, int, moduleid);
730  		mutex_exit(&ns->netstack_lock);
731  		return (dropped);
732  	} else {
733  		mutex_exit(&ns->netstack_lock);
734  		return (dropped);
735  	}
736  }
737  
738  /*
739   * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
740   * is set.
741   * If some other thread gets here first and sets *_INPROGRESS, then
742   * we wait for that thread to complete so that we can ensure that
743   * all the callbacks are done when we've looped over all netstacks/moduleids.
744   *
745   * When we call the shutdown function, we temporarily drop the netstack_lock
746   * held by the caller, and return true to tell the caller it needs to
747   * re-evalute the state.
748   */
749  static boolean_t
netstack_apply_shutdown(kmutex_t * lockp,netstack_t * ns,int moduleid)750  netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
751  {
752  	netstackid_t stackid;
753  	void * netstack_module;
754  	nm_state_t *nms = &ns->netstack_m_state[moduleid];
755  	boolean_t dropped = B_FALSE;
756  
757  	ASSERT(MUTEX_HELD(lockp));
758  	mutex_enter(&ns->netstack_lock);
759  
760  	if (wait_for_nms_inprogress(ns, nms, lockp))
761  		dropped = B_TRUE;
762  
763  	if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
764  		nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
765  		nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
766  		DTRACE_PROBE2(netstack__shutdown__inprogress,
767  		    netstack_t *, ns, int, moduleid);
768  		mutex_exit(&ns->netstack_lock);
769  		mutex_exit(lockp);
770  		dropped = B_TRUE;
771  
772  		ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
773  		stackid = ns->netstack_stackid;
774  		netstack_module = ns->netstack_modules[moduleid];
775  		DTRACE_PROBE2(netstack__shutdown__start,
776  		    netstackid_t, stackid,
777  		    void *, netstack_module);
778  		(ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
779  		DTRACE_PROBE1(netstack__shutdown__end,
780  		    netstack_t *, ns);
781  
782  		mutex_enter(lockp);
783  		mutex_enter(&ns->netstack_lock);
784  		nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
785  		nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
786  		cv_broadcast(&nms->nms_cv);
787  		DTRACE_PROBE2(netstack__shutdown__completed,
788  		    netstack_t *, ns, int, moduleid);
789  		mutex_exit(&ns->netstack_lock);
790  		return (dropped);
791  	} else {
792  		mutex_exit(&ns->netstack_lock);
793  		return (dropped);
794  	}
795  }
796  
797  /*
798   * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
799   * is set.
800   * If some other thread gets here first and sets *_INPROGRESS, then
801   * we wait for that thread to complete so that we can ensure that
802   * all the callbacks are done when we've looped over all netstacks/moduleids.
803   *
804   * When we call the destroy function, we temporarily drop the netstack_lock
805   * held by the caller, and return true to tell the caller it needs to
806   * re-evalute the state.
807   */
808  static boolean_t
netstack_apply_destroy(kmutex_t * lockp,netstack_t * ns,int moduleid)809  netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
810  {
811  	netstackid_t stackid;
812  	void * netstack_module;
813  	nm_state_t *nms = &ns->netstack_m_state[moduleid];
814  	boolean_t dropped = B_FALSE;
815  
816  	ASSERT(MUTEX_HELD(lockp));
817  	mutex_enter(&ns->netstack_lock);
818  
819  	if (wait_for_nms_inprogress(ns, nms, lockp))
820  		dropped = B_TRUE;
821  
822  	if (nms->nms_flags & NSS_DESTROY_NEEDED) {
823  		nms->nms_flags &= ~NSS_DESTROY_NEEDED;
824  		nms->nms_flags |= NSS_DESTROY_INPROGRESS;
825  		DTRACE_PROBE2(netstack__destroy__inprogress,
826  		    netstack_t *, ns, int, moduleid);
827  		mutex_exit(&ns->netstack_lock);
828  		mutex_exit(lockp);
829  		dropped = B_TRUE;
830  
831  		ASSERT(ns_reg[moduleid].nr_destroy != NULL);
832  		stackid = ns->netstack_stackid;
833  		netstack_module = ns->netstack_modules[moduleid];
834  		DTRACE_PROBE2(netstack__destroy__start,
835  		    netstackid_t, stackid,
836  		    void *, netstack_module);
837  		(ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
838  		DTRACE_PROBE1(netstack__destroy__end,
839  		    netstack_t *, ns);
840  
841  		mutex_enter(lockp);
842  		mutex_enter(&ns->netstack_lock);
843  		ns->netstack_modules[moduleid] = NULL;
844  		nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
845  		nms->nms_flags |= NSS_DESTROY_COMPLETED;
846  		cv_broadcast(&nms->nms_cv);
847  		DTRACE_PROBE2(netstack__destroy__completed,
848  		    netstack_t *, ns, int, moduleid);
849  		mutex_exit(&ns->netstack_lock);
850  		return (dropped);
851  	} else {
852  		mutex_exit(&ns->netstack_lock);
853  		return (dropped);
854  	}
855  }
856  
857  /*
858   * If somebody  is creating the netstack (due to a new zone being created)
859   * then we wait for them to complete. This ensures that any additional
860   * netstack_register() doesn't cause the create functions to run out of
861   * order.
862   * Note that we do not need such a global wait in the case of the shutdown
863   * and destroy callbacks, since in that case it is sufficient for both
864   * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
865   * Returns true if lockp was temporarily dropped while waiting.
866   */
867  static boolean_t
wait_for_zone_creator(netstack_t * ns,kmutex_t * lockp)868  wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
869  {
870  	boolean_t dropped = B_FALSE;
871  
872  	mutex_enter(&ns->netstack_lock);
873  	while (ns->netstack_flags & NSF_ZONE_CREATE) {
874  		DTRACE_PROBE1(netstack__wait__zone__inprogress,
875  		    netstack_t *, ns);
876  		if (lockp != NULL) {
877  			dropped = B_TRUE;
878  			mutex_exit(lockp);
879  		}
880  		cv_wait(&ns->netstack_cv, &ns->netstack_lock);
881  		if (lockp != NULL) {
882  			/* First drop netstack_lock to preserve order */
883  			mutex_exit(&ns->netstack_lock);
884  			mutex_enter(lockp);
885  			mutex_enter(&ns->netstack_lock);
886  		}
887  	}
888  	mutex_exit(&ns->netstack_lock);
889  	return (dropped);
890  }
891  
892  /*
893   * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
894   * combination.
895   * Returns true if lockp was temporarily dropped while waiting.
896   */
897  static boolean_t
wait_for_nms_inprogress(netstack_t * ns,nm_state_t * nms,kmutex_t * lockp)898  wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
899  {
900  	boolean_t dropped = B_FALSE;
901  
902  	while (nms->nms_flags & NSS_ALL_INPROGRESS) {
903  		DTRACE_PROBE2(netstack__wait__nms__inprogress,
904  		    netstack_t *, ns, nm_state_t *, nms);
905  		if (lockp != NULL) {
906  			dropped = B_TRUE;
907  			mutex_exit(lockp);
908  		}
909  		cv_wait(&nms->nms_cv, &ns->netstack_lock);
910  		if (lockp != NULL) {
911  			/* First drop netstack_lock to preserve order */
912  			mutex_exit(&ns->netstack_lock);
913  			mutex_enter(lockp);
914  			mutex_enter(&ns->netstack_lock);
915  		}
916  	}
917  	return (dropped);
918  }
919  
920  /*
921   * Get the stack instance used in caller's zone.
922   * Increases the reference count, caller must do a netstack_rele.
923   * It can't be called after zone_destroy() has started.
924   */
925  netstack_t *
netstack_get_current(void)926  netstack_get_current(void)
927  {
928  	netstack_t *ns;
929  
930  	ns = curproc->p_zone->zone_netstack;
931  	ASSERT(ns != NULL);
932  	return (netstack_hold_if_active(ns));
933  }
934  
935  /*
936   * Find a stack instance given the cred.
937   * This is used by the modules to potentially allow for a future when
938   * something other than the zoneid is used to determine the stack.
939   */
940  netstack_t *
netstack_find_by_cred(const cred_t * cr)941  netstack_find_by_cred(const cred_t *cr)
942  {
943  	zoneid_t zoneid = crgetzoneid(cr);
944  
945  	/* Handle the case when cr_zone is NULL */
946  	if (zoneid == (zoneid_t)-1)
947  		zoneid = GLOBAL_ZONEID;
948  
949  	/* For performance ... */
950  	if (curproc->p_zone->zone_id == zoneid)
951  		return (netstack_get_current());
952  	else
953  		return (netstack_find_by_zoneid(zoneid));
954  }
955  
956  /*
957   * Find a stack instance given the zoneid.
958   * Increases the reference count if found; caller must do a
959   * netstack_rele().
960   *
961   * If there is no exact match then assume the shared stack instance
962   * matches.
963   *
964   * Skip the uninitialized and closing ones.
965   */
966  netstack_t *
netstack_find_by_zoneid(zoneid_t zoneid)967  netstack_find_by_zoneid(zoneid_t zoneid)
968  {
969  	netstack_t *ns;
970  	zone_t *zone;
971  
972  	zone = zone_find_by_id(zoneid);
973  
974  	if (zone == NULL)
975  		return (NULL);
976  
977  	ASSERT(zone->zone_netstack != NULL);
978  	ns = netstack_hold_if_active(zone->zone_netstack);
979  
980  	zone_rele(zone);
981  	return (ns);
982  }
983  
984  /*
985   * Find a stack instance given the zoneid. Can only be called from
986   * the create callback. See the comments in zone_find_by_id_nolock why
987   * that limitation exists.
988   *
989   * Increases the reference count if found; caller must do a
990   * netstack_rele().
991   *
992   * If there is no exact match then assume the shared stack instance
993   * matches.
994   *
995   * Skip the unitialized ones.
996   */
997  netstack_t *
netstack_find_by_zoneid_nolock(zoneid_t zoneid)998  netstack_find_by_zoneid_nolock(zoneid_t zoneid)
999  {
1000  	zone_t *zone;
1001  
1002  	zone = zone_find_by_id_nolock(zoneid);
1003  
1004  	if (zone == NULL)
1005  		return (NULL);
1006  
1007  	ASSERT(zone->zone_netstack != NULL);
1008  	/* zone_find_by_id_nolock does not have a hold on the zone */
1009  	return (netstack_hold_if_active(zone->zone_netstack));
1010  }
1011  
1012  /*
1013   * Find a stack instance given the stackid with exact match?
1014   * Increases the reference count if found; caller must do a
1015   * netstack_rele().
1016   *
1017   * Skip the unitialized ones.
1018   */
1019  netstack_t *
netstack_find_by_stackid(netstackid_t stackid)1020  netstack_find_by_stackid(netstackid_t stackid)
1021  {
1022  	netstack_t *ns;
1023  
1024  	mutex_enter(&netstack_g_lock);
1025  	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1026  		/* Can't use hold_if_active because of stackid check. */
1027  		mutex_enter(&ns->netstack_lock);
1028  		if (ns->netstack_stackid == stackid &&
1029  		    !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1030  			netstack_hold_locked(ns);
1031  			mutex_exit(&ns->netstack_lock);
1032  			mutex_exit(&netstack_g_lock);
1033  			return (ns);
1034  		}
1035  		mutex_exit(&ns->netstack_lock);
1036  	}
1037  	mutex_exit(&netstack_g_lock);
1038  	return (NULL);
1039  }
1040  
1041  boolean_t
netstack_inuse_by_stackid(netstackid_t stackid)1042  netstack_inuse_by_stackid(netstackid_t stackid)
1043  {
1044  	netstack_t *ns;
1045  	boolean_t rval = B_FALSE;
1046  
1047  	mutex_enter(&netstack_g_lock);
1048  
1049  	for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1050  		if (ns->netstack_stackid == stackid) {
1051  			rval = B_TRUE;
1052  			break;
1053  		}
1054  	}
1055  
1056  	mutex_exit(&netstack_g_lock);
1057  
1058  	return (rval);
1059  }
1060  
1061  
1062  static void
netstack_reap(void * arg)1063  netstack_reap(void *arg)
1064  {
1065  	netstack_t **nsp, *ns = (netstack_t *)arg;
1066  	boolean_t found;
1067  	int i;
1068  
1069  	/*
1070  	 * Time to call the destroy functions and free up
1071  	 * the structure
1072  	 */
1073  	netstack_stack_inactive(ns);
1074  
1075  	/* Make sure nothing increased the references */
1076  	ASSERT(ns->netstack_refcnt == 0);
1077  	ASSERT(ns->netstack_numzones == 0);
1078  
1079  	/* Finally remove from list of netstacks */
1080  	mutex_enter(&netstack_g_lock);
1081  	found = B_FALSE;
1082  	for (nsp = &netstack_head; *nsp != NULL;
1083  	    nsp = &(*nsp)->netstack_next) {
1084  		if (*nsp == ns) {
1085  			*nsp = ns->netstack_next;
1086  			ns->netstack_next = NULL;
1087  			found = B_TRUE;
1088  			break;
1089  		}
1090  	}
1091  	ASSERT(found);
1092  	mutex_exit(&netstack_g_lock);
1093  
1094  	/* Make sure nothing increased the references */
1095  	ASSERT(ns->netstack_refcnt == 0);
1096  	ASSERT(ns->netstack_numzones == 0);
1097  
1098  	ASSERT(ns->netstack_flags & NSF_CLOSING);
1099  
1100  	for (i = 0; i < NS_MAX; i++) {
1101  		nm_state_t *nms = &ns->netstack_m_state[i];
1102  
1103  		cv_destroy(&nms->nms_cv);
1104  	}
1105  	mutex_destroy(&ns->netstack_lock);
1106  	cv_destroy(&ns->netstack_cv);
1107  	kmem_free(ns, sizeof (*ns));
1108  	/* Allow another reap to be scheduled. */
1109  	sema_v(&netstack_reap_limiter);
1110  }
1111  
1112  void
netstack_rele(netstack_t * ns)1113  netstack_rele(netstack_t *ns)
1114  {
1115  	int refcnt, numzones;
1116  
1117  	mutex_enter(&ns->netstack_lock);
1118  	ASSERT(ns->netstack_refcnt > 0);
1119  	ns->netstack_refcnt--;
1120  	/*
1121  	 * As we drop the lock additional netstack_rele()s can come in
1122  	 * and decrement the refcnt to zero and free the netstack_t.
1123  	 * Store pointers in local variables and if we were not the last
1124  	 * then don't reference the netstack_t after that.
1125  	 */
1126  	refcnt = ns->netstack_refcnt;
1127  	numzones = ns->netstack_numzones;
1128  	DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1129  	mutex_exit(&ns->netstack_lock);
1130  
1131  	if (refcnt == 0 && numzones == 0) {
1132  		/*
1133  		 * Because there are possibilities of re-entrancy in various
1134  		 * netstack structures by callers, which might cause a lock up
1135  		 * due to odd reference models, or other factors, we choose to
1136  		 * schedule the actual deletion of this netstack as a deferred
1137  		 * task on the system taskq.  This way, any such reference
1138  		 * models won't trip over themselves.
1139  		 *
1140  		 * Assume we aren't in a high-priority interrupt context, so
1141  		 * we can use KM_SLEEP and semaphores.
1142  		 */
1143  		if (sema_tryp(&netstack_reap_limiter) == 0) {
1144  			/*
1145  			 * Indicate we're slamming against a limit.
1146  			 */
1147  			hrtime_t measurement = gethrtime();
1148  
1149  			sema_p(&netstack_reap_limiter);
1150  			/* Capture delay in ns. */
1151  			DTRACE_PROBE1(netstack__reap__rate__limited,
1152  			    hrtime_t, gethrtime() - measurement);
1153  		}
1154  
1155  		/* TQ_SLEEP should prevent taskq_dispatch() from failing. */
1156  		(void) taskq_dispatch(system_taskq, netstack_reap, ns,
1157  		    TQ_SLEEP);
1158  	}
1159  }
1160  
1161  static void
netstack_hold_locked(netstack_t * ns)1162  netstack_hold_locked(netstack_t *ns)
1163  {
1164  	ASSERT(MUTEX_HELD(&ns->netstack_lock));
1165  	ns->netstack_refcnt++;
1166  	ASSERT(ns->netstack_refcnt > 0);
1167  	DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1168  }
1169  
1170  /*
1171   * If the passed-in netstack isn't active (i.e. it's uninitialized or closing),
1172   * return NULL, otherwise return it with its reference held.  Common code
1173   * for many netstack_find*() functions.
1174   */
1175  netstack_t *
netstack_hold_if_active(netstack_t * ns)1176  netstack_hold_if_active(netstack_t *ns)
1177  {
1178  	netstack_t *retval;
1179  
1180  	mutex_enter(&ns->netstack_lock);
1181  	if (ns->netstack_flags & (NSF_UNINIT | NSF_CLOSING)) {
1182  		retval = NULL;
1183  	} else {
1184  		netstack_hold_locked(ns);
1185  		retval = ns;
1186  	}
1187  	mutex_exit(&ns->netstack_lock);
1188  
1189  	return (retval);
1190  }
1191  
1192  void
netstack_hold(netstack_t * ns)1193  netstack_hold(netstack_t *ns)
1194  {
1195  	mutex_enter(&ns->netstack_lock);
1196  	netstack_hold_locked(ns);
1197  	mutex_exit(&ns->netstack_lock);
1198  }
1199  
1200  /*
1201   * To support kstat_create_netstack() using kstat_zone_add we need
1202   * to track both
1203   *  - all zoneids that use the global/shared stack
1204   *  - all kstats that have been added for the shared stack
1205   */
1206  kstat_t *
kstat_create_netstack(char * ks_module,int ks_instance,char * ks_name,char * ks_class,uchar_t ks_type,uint_t ks_ndata,uchar_t ks_flags,netstackid_t ks_netstackid)1207  kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1208      char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1209      netstackid_t ks_netstackid)
1210  {
1211  	kstat_t *ks;
1212  
1213  	if (ks_netstackid == GLOBAL_NETSTACKID) {
1214  		ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1215  		    ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1216  		if (ks != NULL)
1217  			netstack_shared_kstat_add(ks);
1218  		return (ks);
1219  	} else {
1220  		zoneid_t zoneid = ks_netstackid;
1221  
1222  		return (kstat_create_zone(ks_module, ks_instance, ks_name,
1223  		    ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1224  	}
1225  }
1226  
1227  void
kstat_delete_netstack(kstat_t * ks,netstackid_t ks_netstackid)1228  kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1229  {
1230  	if (ks_netstackid == GLOBAL_NETSTACKID) {
1231  		netstack_shared_kstat_remove(ks);
1232  	}
1233  	kstat_delete(ks);
1234  }
1235  
1236  static void
netstack_shared_zone_add(zoneid_t zoneid)1237  netstack_shared_zone_add(zoneid_t zoneid)
1238  {
1239  	struct shared_zone_list *sz;
1240  	struct shared_kstat_list *sk;
1241  
1242  	sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1243  	sz->sz_zoneid = zoneid;
1244  
1245  	/* Insert in list */
1246  	mutex_enter(&netstack_shared_lock);
1247  	sz->sz_next = netstack_shared_zones;
1248  	netstack_shared_zones = sz;
1249  
1250  	/*
1251  	 * Perform kstat_zone_add for each existing shared stack kstat.
1252  	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1253  	 */
1254  	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1255  		kstat_zone_add(sk->sk_kstat, zoneid);
1256  	}
1257  	mutex_exit(&netstack_shared_lock);
1258  }
1259  
1260  static void
netstack_shared_zone_remove(zoneid_t zoneid)1261  netstack_shared_zone_remove(zoneid_t zoneid)
1262  {
1263  	struct shared_zone_list **szp, *sz;
1264  	struct shared_kstat_list *sk;
1265  
1266  	/* Find in list */
1267  	mutex_enter(&netstack_shared_lock);
1268  	sz = NULL;
1269  	for (szp = &netstack_shared_zones; *szp != NULL;
1270  	    szp = &((*szp)->sz_next)) {
1271  		if ((*szp)->sz_zoneid == zoneid) {
1272  			sz = *szp;
1273  			break;
1274  		}
1275  	}
1276  	/* We must find it */
1277  	ASSERT(sz != NULL);
1278  	*szp = sz->sz_next;
1279  	sz->sz_next = NULL;
1280  
1281  	/*
1282  	 * Perform kstat_zone_remove for each existing shared stack kstat.
1283  	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1284  	 */
1285  	for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1286  		kstat_zone_remove(sk->sk_kstat, zoneid);
1287  	}
1288  	mutex_exit(&netstack_shared_lock);
1289  
1290  	kmem_free(sz, sizeof (*sz));
1291  }
1292  
1293  static void
netstack_shared_kstat_add(kstat_t * ks)1294  netstack_shared_kstat_add(kstat_t *ks)
1295  {
1296  	struct shared_zone_list *sz;
1297  	struct shared_kstat_list *sk;
1298  
1299  	sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1300  	sk->sk_kstat = ks;
1301  
1302  	/* Insert in list */
1303  	mutex_enter(&netstack_shared_lock);
1304  	sk->sk_next = netstack_shared_kstats;
1305  	netstack_shared_kstats = sk;
1306  
1307  	/*
1308  	 * Perform kstat_zone_add for each existing shared stack zone.
1309  	 * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1310  	 */
1311  	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1312  		kstat_zone_add(ks, sz->sz_zoneid);
1313  	}
1314  	mutex_exit(&netstack_shared_lock);
1315  }
1316  
1317  static void
netstack_shared_kstat_remove(kstat_t * ks)1318  netstack_shared_kstat_remove(kstat_t *ks)
1319  {
1320  	struct shared_zone_list *sz;
1321  	struct shared_kstat_list **skp, *sk;
1322  
1323  	/* Find in list */
1324  	mutex_enter(&netstack_shared_lock);
1325  	sk = NULL;
1326  	for (skp = &netstack_shared_kstats; *skp != NULL;
1327  	    skp = &((*skp)->sk_next)) {
1328  		if ((*skp)->sk_kstat == ks) {
1329  			sk = *skp;
1330  			break;
1331  		}
1332  	}
1333  	/* Must find it */
1334  	ASSERT(sk != NULL);
1335  	*skp = sk->sk_next;
1336  	sk->sk_next = NULL;
1337  
1338  	/*
1339  	 * Perform kstat_zone_remove for each existing shared stack kstat.
1340  	 * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1341  	 */
1342  	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1343  		kstat_zone_remove(ks, sz->sz_zoneid);
1344  	}
1345  	mutex_exit(&netstack_shared_lock);
1346  	kmem_free(sk, sizeof (*sk));
1347  }
1348  
1349  /*
1350   * If a zoneid is part of the shared zone, return true
1351   */
1352  static boolean_t
netstack_find_shared_zoneid(zoneid_t zoneid)1353  netstack_find_shared_zoneid(zoneid_t zoneid)
1354  {
1355  	struct shared_zone_list *sz;
1356  
1357  	mutex_enter(&netstack_shared_lock);
1358  	for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1359  		if (sz->sz_zoneid == zoneid) {
1360  			mutex_exit(&netstack_shared_lock);
1361  			return (B_TRUE);
1362  		}
1363  	}
1364  	mutex_exit(&netstack_shared_lock);
1365  	return (B_FALSE);
1366  }
1367  
1368  /*
1369   * Hide the fact that zoneids and netstackids are allocated from
1370   * the same space in the current implementation.
1371   * We currently do not check that the stackid/zoneids are valid, since there
1372   * is no need for that. But this should only be done for ids that are
1373   * valid.
1374   */
1375  zoneid_t
netstackid_to_zoneid(netstackid_t stackid)1376  netstackid_to_zoneid(netstackid_t stackid)
1377  {
1378  	return (stackid);
1379  }
1380  
1381  netstackid_t
zoneid_to_netstackid(zoneid_t zoneid)1382  zoneid_to_netstackid(zoneid_t zoneid)
1383  {
1384  	if (netstack_find_shared_zoneid(zoneid))
1385  		return (GLOBAL_ZONEID);
1386  	else
1387  		return (zoneid);
1388  }
1389  
1390  zoneid_t
netstack_get_zoneid(netstack_t * ns)1391  netstack_get_zoneid(netstack_t *ns)
1392  {
1393  	return (netstackid_to_zoneid(ns->netstack_stackid));
1394  }
1395  
1396  /*
1397   * Simplistic support for walking all the handles.
1398   * Example usage:
1399   *	netstack_handle_t nh;
1400   *	netstack_t *ns;
1401   *
1402   *	netstack_next_init(&nh);
1403   *	while ((ns = netstack_next(&nh)) != NULL) {
1404   *		do something;
1405   *		netstack_rele(ns);
1406   *	}
1407   *	netstack_next_fini(&nh);
1408   */
1409  void
netstack_next_init(netstack_handle_t * handle)1410  netstack_next_init(netstack_handle_t *handle)
1411  {
1412  	*handle = 0;
1413  }
1414  
1415  /* ARGSUSED */
1416  void
netstack_next_fini(netstack_handle_t * handle)1417  netstack_next_fini(netstack_handle_t *handle)
1418  {
1419  }
1420  
1421  netstack_t *
netstack_next(netstack_handle_t * handle)1422  netstack_next(netstack_handle_t *handle)
1423  {
1424  	netstack_t *ns;
1425  	int i, end;
1426  
1427  	end = *handle;
1428  	/* Walk skipping *handle number of instances */
1429  
1430  	/* Look if there is a matching stack instance */
1431  	mutex_enter(&netstack_g_lock);
1432  	ns = netstack_head;
1433  	for (i = 0; i < end; i++) {
1434  		if (ns == NULL)
1435  			break;
1436  		ns = ns->netstack_next;
1437  	}
1438  	/*
1439  	 * Skip those that aren't really here (uninitialized or closing).
1440  	 * Can't use hold_if_active because of "end" tracking.
1441  	 */
1442  	while (ns != NULL) {
1443  		mutex_enter(&ns->netstack_lock);
1444  		if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1445  			*handle = end + 1;
1446  			netstack_hold_locked(ns);
1447  			mutex_exit(&ns->netstack_lock);
1448  			break;
1449  		}
1450  		mutex_exit(&ns->netstack_lock);
1451  		end++;
1452  		ns = ns->netstack_next;
1453  	}
1454  	mutex_exit(&netstack_g_lock);
1455  	return (ns);
1456  }
1457