1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2017, Joyent, Inc. All rights reserved. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/sysmacros.h> 30 #include <sys/vm.h> 31 #include <sys/proc.h> 32 #include <sys/tuneable.h> 33 #include <sys/systm.h> 34 #include <sys/cmn_err.h> 35 #include <sys/debug.h> 36 #include <sys/sdt.h> 37 #include <sys/mutex.h> 38 #include <sys/bitmap.h> 39 #include <sys/atomic.h> 40 #include <sys/sunddi.h> 41 #include <sys/kobj.h> 42 #include <sys/disp.h> 43 #include <vm/seg_kmem.h> 44 #include <sys/zone.h> 45 #include <sys/netstack.h> 46 47 /* 48 * What we use so that the zones framework can tell us about new zones, 49 * which we use to create new stacks. 50 */ 51 static zone_key_t netstack_zone_key; 52 53 static int netstack_initialized = 0; 54 55 /* 56 * Track the registered netstacks. 57 * The global lock protects 58 * - ns_reg 59 * - the list starting at netstack_head and following the netstack_next 60 * pointers. 61 */ 62 static kmutex_t netstack_g_lock; 63 64 /* 65 * Registry of netstacks with their create/shutdown/destory functions. 66 */ 67 static struct netstack_registry ns_reg[NS_MAX]; 68 69 /* 70 * Global list of existing stacks. We use this when a new zone with 71 * an exclusive IP instance is created. 72 * 73 * Note that in some cases a netstack_t needs to stay around after the zone 74 * has gone away. This is because there might be outstanding references 75 * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data 76 * structure and all the foo_stack_t's hanging off of it will be cleaned up 77 * when the last reference to it is dropped. 78 * However, the same zone might be rebooted. That is handled using the 79 * assumption that the zones framework picks a new zoneid each time a zone 80 * is (re)booted. We assert for that condition in netstack_zone_create(). 81 * Thus the old netstack_t can take its time for things to time out. 82 */ 83 static netstack_t *netstack_head; 84 85 /* 86 * To support kstat_create_netstack() using kstat_zone_add we need 87 * to track both 88 * - all zoneids that use the global/shared stack 89 * - all kstats that have been added for the shared stack 90 */ 91 struct shared_zone_list { 92 struct shared_zone_list *sz_next; 93 zoneid_t sz_zoneid; 94 }; 95 96 struct shared_kstat_list { 97 struct shared_kstat_list *sk_next; 98 kstat_t *sk_kstat; 99 }; 100 101 static kmutex_t netstack_shared_lock; /* protects the following two */ 102 static struct shared_zone_list *netstack_shared_zones; 103 static struct shared_kstat_list *netstack_shared_kstats; 104 105 static void *netstack_zone_create(zoneid_t zoneid); 106 static void netstack_zone_shutdown(zoneid_t zoneid, void *arg); 107 static void netstack_zone_destroy(zoneid_t zoneid, void *arg); 108 109 static void netstack_shared_zone_add(zoneid_t zoneid); 110 static void netstack_shared_zone_remove(zoneid_t zoneid); 111 static void netstack_shared_kstat_add(kstat_t *ks); 112 static void netstack_shared_kstat_remove(kstat_t *ks); 113 114 typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int); 115 116 static void apply_all_netstacks(int, applyfn_t *); 117 static void apply_all_modules(netstack_t *, applyfn_t *); 118 static void apply_all_modules_reverse(netstack_t *, applyfn_t *); 119 static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int); 120 static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int); 121 static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int); 122 static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *); 123 static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *, 124 kmutex_t *); 125 126 static void netstack_hold_locked(netstack_t *); 127 128 static ksema_t netstack_reap_limiter; 129 /* 130 * Hard-coded constant, but since this is not tunable in real-time, it seems 131 * making it an /etc/system tunable is better than nothing. 132 */ 133 uint_t netstack_outstanding_reaps = 1024; 134 135 void 136 netstack_init(void) 137 { 138 mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL); 139 mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL); 140 141 sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL, 142 SEMA_DRIVER, NULL); 143 144 netstack_initialized = 1; 145 146 /* 147 * We want to be informed each time a zone is created or 148 * destroyed in the kernel, so we can maintain the 149 * stack instance information. 150 */ 151 zone_key_create(&netstack_zone_key, netstack_zone_create, 152 netstack_zone_shutdown, netstack_zone_destroy); 153 } 154 155 /* 156 * Register a new module with the framework. 157 * This registers interest in changes to the set of netstacks. 158 * The createfn and destroyfn are required, but the shutdownfn can be 159 * NULL. 160 * Note that due to the current zsd implementation, when the create 161 * function is called the zone isn't fully present, thus functions 162 * like zone_find_by_* will fail, hence the create function can not 163 * use many zones kernel functions including zcmn_err(). 164 */ 165 void 166 netstack_register(int moduleid, 167 void *(*module_create)(netstackid_t, netstack_t *), 168 void (*module_shutdown)(netstackid_t, void *), 169 void (*module_destroy)(netstackid_t, void *)) 170 { 171 netstack_t *ns; 172 173 ASSERT(netstack_initialized); 174 ASSERT(moduleid >= 0 && moduleid < NS_MAX); 175 ASSERT(module_create != NULL); 176 177 /* 178 * Make instances created after this point in time run the create 179 * callback. 180 */ 181 mutex_enter(&netstack_g_lock); 182 ASSERT(ns_reg[moduleid].nr_create == NULL); 183 ASSERT(ns_reg[moduleid].nr_flags == 0); 184 ns_reg[moduleid].nr_create = module_create; 185 ns_reg[moduleid].nr_shutdown = module_shutdown; 186 ns_reg[moduleid].nr_destroy = module_destroy; 187 ns_reg[moduleid].nr_flags = NRF_REGISTERED; 188 189 /* 190 * Determine the set of stacks that exist before we drop the lock. 191 * Set NSS_CREATE_NEEDED for each of those. 192 * netstacks which have been deleted will have NSS_CREATE_COMPLETED 193 * set, but check NSF_CLOSING to be sure. 194 */ 195 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { 196 nm_state_t *nms = &ns->netstack_m_state[moduleid]; 197 198 mutex_enter(&ns->netstack_lock); 199 if (!(ns->netstack_flags & NSF_CLOSING) && 200 (nms->nms_flags & NSS_CREATE_ALL) == 0) { 201 nms->nms_flags |= NSS_CREATE_NEEDED; 202 DTRACE_PROBE2(netstack__create__needed, 203 netstack_t *, ns, int, moduleid); 204 } 205 mutex_exit(&ns->netstack_lock); 206 } 207 mutex_exit(&netstack_g_lock); 208 209 /* 210 * At this point in time a new instance can be created or an instance 211 * can be destroyed, or some other module can register or unregister. 212 * Make sure we either run all the create functions for this moduleid 213 * or we wait for any other creators for this moduleid. 214 */ 215 apply_all_netstacks(moduleid, netstack_apply_create); 216 } 217 218 void 219 netstack_unregister(int moduleid) 220 { 221 netstack_t *ns; 222 223 ASSERT(moduleid >= 0 && moduleid < NS_MAX); 224 225 ASSERT(ns_reg[moduleid].nr_create != NULL); 226 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED); 227 228 mutex_enter(&netstack_g_lock); 229 /* 230 * Determine the set of stacks that exist before we drop the lock. 231 * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those. 232 * That ensures that when we return all the callbacks for existing 233 * instances have completed. And since we set NRF_DYING no new 234 * instances can use this module. 235 */ 236 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { 237 boolean_t created = B_FALSE; 238 nm_state_t *nms = &ns->netstack_m_state[moduleid]; 239 240 mutex_enter(&ns->netstack_lock); 241 242 /* 243 * We need to be careful here. We could actually have a netstack 244 * being created as we speak waiting for us to let go of this 245 * lock to proceed. It may have set NSS_CREATE_NEEDED, but not 246 * have gotten to the point of completing it yet. If 247 * NSS_CREATE_NEEDED, we can safely just remove it here and 248 * never create the module. However, if NSS_CREATE_INPROGRESS is 249 * set, we need to still flag this module for shutdown and 250 * deletion, just as though it had reached NSS_CREATE_COMPLETED. 251 * 252 * It is safe to do that because of two different guarantees 253 * that exist in the system. The first is that before we do a 254 * create, shutdown, or destroy, we ensure that nothing else is 255 * in progress in the system for this netstack and wait for it 256 * to complete. Secondly, because the zone is being created, we 257 * know that the following call to apply_all_netstack will block 258 * on the zone finishing its initialization. 259 */ 260 if (nms->nms_flags & NSS_CREATE_NEEDED) 261 nms->nms_flags &= ~NSS_CREATE_NEEDED; 262 263 if (nms->nms_flags & NSS_CREATE_INPROGRESS || 264 nms->nms_flags & NSS_CREATE_COMPLETED) 265 created = B_TRUE; 266 267 if (ns_reg[moduleid].nr_shutdown != NULL && created && 268 (nms->nms_flags & NSS_CREATE_COMPLETED) && 269 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) { 270 nms->nms_flags |= NSS_SHUTDOWN_NEEDED; 271 DTRACE_PROBE2(netstack__shutdown__needed, 272 netstack_t *, ns, int, moduleid); 273 } 274 if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) && 275 ns_reg[moduleid].nr_destroy != NULL && created && 276 (nms->nms_flags & NSS_DESTROY_ALL) == 0) { 277 nms->nms_flags |= NSS_DESTROY_NEEDED; 278 DTRACE_PROBE2(netstack__destroy__needed, 279 netstack_t *, ns, int, moduleid); 280 } 281 mutex_exit(&ns->netstack_lock); 282 } 283 /* 284 * Prevent any new netstack from calling the registered create 285 * function, while keeping the function pointers in place until the 286 * shutdown and destroy callbacks are complete. 287 */ 288 ns_reg[moduleid].nr_flags |= NRF_DYING; 289 mutex_exit(&netstack_g_lock); 290 291 apply_all_netstacks(moduleid, netstack_apply_shutdown); 292 apply_all_netstacks(moduleid, netstack_apply_destroy); 293 294 /* 295 * Clear the nms_flags so that we can handle this module 296 * being loaded again. 297 * Also remove the registered functions. 298 */ 299 mutex_enter(&netstack_g_lock); 300 ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED); 301 ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING); 302 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { 303 nm_state_t *nms = &ns->netstack_m_state[moduleid]; 304 305 mutex_enter(&ns->netstack_lock); 306 if (nms->nms_flags & NSS_DESTROY_COMPLETED) { 307 nms->nms_flags = 0; 308 DTRACE_PROBE2(netstack__destroy__done, 309 netstack_t *, ns, int, moduleid); 310 } 311 mutex_exit(&ns->netstack_lock); 312 } 313 314 ns_reg[moduleid].nr_create = NULL; 315 ns_reg[moduleid].nr_shutdown = NULL; 316 ns_reg[moduleid].nr_destroy = NULL; 317 ns_reg[moduleid].nr_flags = 0; 318 mutex_exit(&netstack_g_lock); 319 } 320 321 /* 322 * Lookup and/or allocate a netstack for this zone. 323 */ 324 static void * 325 netstack_zone_create(zoneid_t zoneid) 326 { 327 netstackid_t stackid; 328 netstack_t *ns; 329 netstack_t **nsp; 330 zone_t *zone; 331 int i; 332 333 ASSERT(netstack_initialized); 334 335 zone = zone_find_by_id_nolock(zoneid); 336 ASSERT(zone != NULL); 337 338 if (zone->zone_flags & ZF_NET_EXCL) { 339 stackid = zoneid; 340 } else { 341 /* Look for the stack instance for the global */ 342 stackid = GLOBAL_NETSTACKID; 343 } 344 345 /* Allocate even if it isn't needed; simplifies locking */ 346 ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP); 347 348 /* Look if there is a matching stack instance */ 349 mutex_enter(&netstack_g_lock); 350 for (nsp = &netstack_head; *nsp != NULL; 351 nsp = &((*nsp)->netstack_next)) { 352 if ((*nsp)->netstack_stackid == stackid) { 353 /* 354 * Should never find a pre-existing exclusive stack 355 */ 356 VERIFY(stackid == GLOBAL_NETSTACKID); 357 kmem_free(ns, sizeof (netstack_t)); 358 ns = *nsp; 359 mutex_enter(&ns->netstack_lock); 360 ns->netstack_numzones++; 361 mutex_exit(&ns->netstack_lock); 362 mutex_exit(&netstack_g_lock); 363 DTRACE_PROBE1(netstack__inc__numzones, 364 netstack_t *, ns); 365 /* Record that we have a new shared stack zone */ 366 netstack_shared_zone_add(zoneid); 367 zone->zone_netstack = ns; 368 return (ns); 369 } 370 } 371 /* Not found */ 372 mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL); 373 cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL); 374 ns->netstack_stackid = zoneid; 375 ns->netstack_numzones = 1; 376 ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */ 377 ns->netstack_flags = NSF_UNINIT; 378 *nsp = ns; 379 zone->zone_netstack = ns; 380 381 mutex_enter(&ns->netstack_lock); 382 /* 383 * Mark this netstack as having a CREATE running so 384 * any netstack_register/netstack_unregister waits for 385 * the existing create callbacks to complete in moduleid order 386 */ 387 ns->netstack_flags |= NSF_ZONE_CREATE; 388 389 /* 390 * Determine the set of module create functions that need to be 391 * called before we drop the lock. 392 * Set NSS_CREATE_NEEDED for each of those. 393 * Skip any with NRF_DYING set, since those are in the process of 394 * going away, by checking for flags being exactly NRF_REGISTERED. 395 */ 396 for (i = 0; i < NS_MAX; i++) { 397 nm_state_t *nms = &ns->netstack_m_state[i]; 398 399 cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL); 400 401 if ((ns_reg[i].nr_flags == NRF_REGISTERED) && 402 (nms->nms_flags & NSS_CREATE_ALL) == 0) { 403 nms->nms_flags |= NSS_CREATE_NEEDED; 404 DTRACE_PROBE2(netstack__create__needed, 405 netstack_t *, ns, int, i); 406 } 407 } 408 mutex_exit(&ns->netstack_lock); 409 mutex_exit(&netstack_g_lock); 410 411 apply_all_modules(ns, netstack_apply_create); 412 413 /* Tell any waiting netstack_register/netstack_unregister to proceed */ 414 mutex_enter(&ns->netstack_lock); 415 ns->netstack_flags &= ~NSF_UNINIT; 416 ASSERT(ns->netstack_flags & NSF_ZONE_CREATE); 417 ns->netstack_flags &= ~NSF_ZONE_CREATE; 418 cv_broadcast(&ns->netstack_cv); 419 mutex_exit(&ns->netstack_lock); 420 421 return (ns); 422 } 423 424 /* ARGSUSED */ 425 static void 426 netstack_zone_shutdown(zoneid_t zoneid, void *arg) 427 { 428 netstack_t *ns = (netstack_t *)arg; 429 int i; 430 431 ASSERT(arg != NULL); 432 433 mutex_enter(&ns->netstack_lock); 434 ASSERT(ns->netstack_numzones > 0); 435 if (ns->netstack_numzones != 1) { 436 /* Stack instance being used by other zone */ 437 mutex_exit(&ns->netstack_lock); 438 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID); 439 return; 440 } 441 mutex_exit(&ns->netstack_lock); 442 443 mutex_enter(&netstack_g_lock); 444 mutex_enter(&ns->netstack_lock); 445 /* 446 * Mark this netstack as having a SHUTDOWN running so 447 * any netstack_register/netstack_unregister waits for 448 * the existing create callbacks to complete in moduleid order 449 */ 450 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS)); 451 ns->netstack_flags |= NSF_ZONE_SHUTDOWN; 452 453 /* 454 * Determine the set of stacks that exist before we drop the lock. 455 * Set NSS_SHUTDOWN_NEEDED for each of those. 456 */ 457 for (i = 0; i < NS_MAX; i++) { 458 nm_state_t *nms = &ns->netstack_m_state[i]; 459 460 if ((ns_reg[i].nr_flags & NRF_REGISTERED) && 461 ns_reg[i].nr_shutdown != NULL && 462 (nms->nms_flags & NSS_CREATE_COMPLETED) && 463 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) { 464 nms->nms_flags |= NSS_SHUTDOWN_NEEDED; 465 DTRACE_PROBE2(netstack__shutdown__needed, 466 netstack_t *, ns, int, i); 467 } 468 } 469 mutex_exit(&ns->netstack_lock); 470 mutex_exit(&netstack_g_lock); 471 472 /* 473 * Call the shutdown function for all registered modules for this 474 * netstack. 475 */ 476 apply_all_modules_reverse(ns, netstack_apply_shutdown); 477 478 /* Tell any waiting netstack_register/netstack_unregister to proceed */ 479 mutex_enter(&ns->netstack_lock); 480 ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN); 481 ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN; 482 cv_broadcast(&ns->netstack_cv); 483 mutex_exit(&ns->netstack_lock); 484 } 485 486 /* 487 * Common routine to release a zone. 488 * If this was the last zone using the stack instance then prepare to 489 * have the refcnt dropping to zero free the zone. 490 */ 491 /* ARGSUSED */ 492 static void 493 netstack_zone_destroy(zoneid_t zoneid, void *arg) 494 { 495 netstack_t *ns = (netstack_t *)arg; 496 497 ASSERT(arg != NULL); 498 499 mutex_enter(&ns->netstack_lock); 500 ASSERT(ns->netstack_numzones > 0); 501 ns->netstack_numzones--; 502 if (ns->netstack_numzones != 0) { 503 /* Stack instance being used by other zone */ 504 mutex_exit(&ns->netstack_lock); 505 ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID); 506 /* Record that we a shared stack zone has gone away */ 507 netstack_shared_zone_remove(zoneid); 508 return; 509 } 510 /* 511 * Set CLOSING so that netstack_find_by will not find it. 512 */ 513 ns->netstack_flags |= NSF_CLOSING; 514 mutex_exit(&ns->netstack_lock); 515 DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns); 516 /* No other thread can call zone_destroy for this stack */ 517 518 /* 519 * Decrease refcnt to account for the one in netstack_zone_init() 520 */ 521 netstack_rele(ns); 522 } 523 524 /* 525 * Called when the reference count drops to zero. 526 * Call the destroy functions for each registered module. 527 */ 528 static void 529 netstack_stack_inactive(netstack_t *ns) 530 { 531 int i; 532 533 mutex_enter(&netstack_g_lock); 534 mutex_enter(&ns->netstack_lock); 535 /* 536 * Mark this netstack as having a DESTROY running so 537 * any netstack_register/netstack_unregister waits for 538 * the existing destroy callbacks to complete in reverse moduleid order 539 */ 540 ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS)); 541 ns->netstack_flags |= NSF_ZONE_DESTROY; 542 /* 543 * If the shutdown callback wasn't called earlier (e.g., if this is 544 * a netstack shared between multiple zones), then we schedule it now. 545 * 546 * Determine the set of stacks that exist before we drop the lock. 547 * Set NSS_DESTROY_NEEDED for each of those. That 548 * ensures that when we return all the callbacks for existing 549 * instances have completed. 550 */ 551 for (i = 0; i < NS_MAX; i++) { 552 nm_state_t *nms = &ns->netstack_m_state[i]; 553 554 if ((ns_reg[i].nr_flags & NRF_REGISTERED) && 555 ns_reg[i].nr_shutdown != NULL && 556 (nms->nms_flags & NSS_CREATE_COMPLETED) && 557 (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) { 558 nms->nms_flags |= NSS_SHUTDOWN_NEEDED; 559 DTRACE_PROBE2(netstack__shutdown__needed, 560 netstack_t *, ns, int, i); 561 } 562 563 if ((ns_reg[i].nr_flags & NRF_REGISTERED) && 564 ns_reg[i].nr_destroy != NULL && 565 (nms->nms_flags & NSS_CREATE_COMPLETED) && 566 (nms->nms_flags & NSS_DESTROY_ALL) == 0) { 567 nms->nms_flags |= NSS_DESTROY_NEEDED; 568 DTRACE_PROBE2(netstack__destroy__needed, 569 netstack_t *, ns, int, i); 570 } 571 } 572 mutex_exit(&ns->netstack_lock); 573 mutex_exit(&netstack_g_lock); 574 575 /* 576 * Call the shutdown and destroy functions for all registered modules 577 * for this netstack. 578 * 579 * Since there are some ordering dependencies between the modules we 580 * tear them down in the reverse order of what was used to create them. 581 * 582 * Since a netstack_t is never reused (when a zone is rebooted it gets 583 * a new zoneid == netstackid i.e. a new netstack_t is allocated) we 584 * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set. 585 * That is different than in the netstack_unregister() case. 586 */ 587 apply_all_modules_reverse(ns, netstack_apply_shutdown); 588 apply_all_modules_reverse(ns, netstack_apply_destroy); 589 590 /* Tell any waiting netstack_register/netstack_unregister to proceed */ 591 mutex_enter(&ns->netstack_lock); 592 ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY); 593 ns->netstack_flags &= ~NSF_ZONE_DESTROY; 594 cv_broadcast(&ns->netstack_cv); 595 mutex_exit(&ns->netstack_lock); 596 } 597 598 /* 599 * Apply a function to all netstacks for a particular moduleid. 600 * 601 * If there is any zone activity (due to a zone being created, shutdown, 602 * or destroyed) we wait for that to complete before we proceed. This ensures 603 * that the moduleids are processed in order when a zone is created or 604 * destroyed. 605 * 606 * The applyfn has to drop netstack_g_lock if it does some work. 607 * In that case we don't follow netstack_next, 608 * even if it is possible to do so without any hazards. This is 609 * because we want the design to allow for the list of netstacks threaded 610 * by netstack_next to change in any arbitrary way during the time the 611 * lock was dropped. 612 * 613 * It is safe to restart the loop at netstack_head since the applyfn 614 * changes netstack_m_state as it processes things, so a subsequent 615 * pass through will have no effect in applyfn, hence the loop will terminate 616 * in at worst O(N^2). 617 */ 618 static void 619 apply_all_netstacks(int moduleid, applyfn_t *applyfn) 620 { 621 netstack_t *ns; 622 623 mutex_enter(&netstack_g_lock); 624 ns = netstack_head; 625 while (ns != NULL) { 626 if (wait_for_zone_creator(ns, &netstack_g_lock)) { 627 /* Lock dropped - restart at head */ 628 ns = netstack_head; 629 } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) { 630 /* Lock dropped - restart at head */ 631 ns = netstack_head; 632 } else { 633 ns = ns->netstack_next; 634 } 635 } 636 mutex_exit(&netstack_g_lock); 637 } 638 639 /* 640 * Apply a function to all moduleids for a particular netstack. 641 * 642 * Since the netstack linkage doesn't matter in this case we can 643 * ignore whether the function drops the lock. 644 */ 645 static void 646 apply_all_modules(netstack_t *ns, applyfn_t *applyfn) 647 { 648 int i; 649 650 mutex_enter(&netstack_g_lock); 651 for (i = 0; i < NS_MAX; i++) { 652 /* 653 * We don't care whether the lock was dropped 654 * since we are not iterating over netstack_head. 655 */ 656 (void) (applyfn)(&netstack_g_lock, ns, i); 657 } 658 mutex_exit(&netstack_g_lock); 659 } 660 661 /* Like the above but in reverse moduleid order */ 662 static void 663 apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn) 664 { 665 int i; 666 667 mutex_enter(&netstack_g_lock); 668 for (i = NS_MAX-1; i >= 0; i--) { 669 /* 670 * We don't care whether the lock was dropped 671 * since we are not iterating over netstack_head. 672 */ 673 (void) (applyfn)(&netstack_g_lock, ns, i); 674 } 675 mutex_exit(&netstack_g_lock); 676 } 677 678 /* 679 * Call the create function for the ns and moduleid if CREATE_NEEDED 680 * is set. 681 * If some other thread gets here first and sets *_INPROGRESS, then 682 * we wait for that thread to complete so that we can ensure that 683 * all the callbacks are done when we've looped over all netstacks/moduleids. 684 * 685 * When we call the create function, we temporarily drop the netstack_lock 686 * held by the caller, and return true to tell the caller it needs to 687 * re-evalute the state. 688 */ 689 static boolean_t 690 netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid) 691 { 692 void *result; 693 netstackid_t stackid; 694 nm_state_t *nms = &ns->netstack_m_state[moduleid]; 695 boolean_t dropped = B_FALSE; 696 697 ASSERT(MUTEX_HELD(lockp)); 698 mutex_enter(&ns->netstack_lock); 699 700 if (wait_for_nms_inprogress(ns, nms, lockp)) 701 dropped = B_TRUE; 702 703 if (nms->nms_flags & NSS_CREATE_NEEDED) { 704 nms->nms_flags &= ~NSS_CREATE_NEEDED; 705 nms->nms_flags |= NSS_CREATE_INPROGRESS; 706 DTRACE_PROBE2(netstack__create__inprogress, 707 netstack_t *, ns, int, moduleid); 708 mutex_exit(&ns->netstack_lock); 709 mutex_exit(lockp); 710 dropped = B_TRUE; 711 712 ASSERT(ns_reg[moduleid].nr_create != NULL); 713 stackid = ns->netstack_stackid; 714 DTRACE_PROBE2(netstack__create__start, 715 netstackid_t, stackid, 716 netstack_t *, ns); 717 result = (ns_reg[moduleid].nr_create)(stackid, ns); 718 DTRACE_PROBE2(netstack__create__end, 719 void *, result, netstack_t *, ns); 720 721 ASSERT(result != NULL); 722 mutex_enter(lockp); 723 mutex_enter(&ns->netstack_lock); 724 ns->netstack_modules[moduleid] = result; 725 nms->nms_flags &= ~NSS_CREATE_INPROGRESS; 726 nms->nms_flags |= NSS_CREATE_COMPLETED; 727 cv_broadcast(&nms->nms_cv); 728 DTRACE_PROBE2(netstack__create__completed, 729 netstack_t *, ns, int, moduleid); 730 mutex_exit(&ns->netstack_lock); 731 return (dropped); 732 } else { 733 mutex_exit(&ns->netstack_lock); 734 return (dropped); 735 } 736 } 737 738 /* 739 * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED 740 * is set. 741 * If some other thread gets here first and sets *_INPROGRESS, then 742 * we wait for that thread to complete so that we can ensure that 743 * all the callbacks are done when we've looped over all netstacks/moduleids. 744 * 745 * When we call the shutdown function, we temporarily drop the netstack_lock 746 * held by the caller, and return true to tell the caller it needs to 747 * re-evalute the state. 748 */ 749 static boolean_t 750 netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid) 751 { 752 netstackid_t stackid; 753 void * netstack_module; 754 nm_state_t *nms = &ns->netstack_m_state[moduleid]; 755 boolean_t dropped = B_FALSE; 756 757 ASSERT(MUTEX_HELD(lockp)); 758 mutex_enter(&ns->netstack_lock); 759 760 if (wait_for_nms_inprogress(ns, nms, lockp)) 761 dropped = B_TRUE; 762 763 if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) { 764 nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED; 765 nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS; 766 DTRACE_PROBE2(netstack__shutdown__inprogress, 767 netstack_t *, ns, int, moduleid); 768 mutex_exit(&ns->netstack_lock); 769 mutex_exit(lockp); 770 dropped = B_TRUE; 771 772 ASSERT(ns_reg[moduleid].nr_shutdown != NULL); 773 stackid = ns->netstack_stackid; 774 netstack_module = ns->netstack_modules[moduleid]; 775 DTRACE_PROBE2(netstack__shutdown__start, 776 netstackid_t, stackid, 777 void *, netstack_module); 778 (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module); 779 DTRACE_PROBE1(netstack__shutdown__end, 780 netstack_t *, ns); 781 782 mutex_enter(lockp); 783 mutex_enter(&ns->netstack_lock); 784 nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS; 785 nms->nms_flags |= NSS_SHUTDOWN_COMPLETED; 786 cv_broadcast(&nms->nms_cv); 787 DTRACE_PROBE2(netstack__shutdown__completed, 788 netstack_t *, ns, int, moduleid); 789 mutex_exit(&ns->netstack_lock); 790 return (dropped); 791 } else { 792 mutex_exit(&ns->netstack_lock); 793 return (dropped); 794 } 795 } 796 797 /* 798 * Call the destroy function for the ns and moduleid if DESTROY_NEEDED 799 * is set. 800 * If some other thread gets here first and sets *_INPROGRESS, then 801 * we wait for that thread to complete so that we can ensure that 802 * all the callbacks are done when we've looped over all netstacks/moduleids. 803 * 804 * When we call the destroy function, we temporarily drop the netstack_lock 805 * held by the caller, and return true to tell the caller it needs to 806 * re-evalute the state. 807 */ 808 static boolean_t 809 netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid) 810 { 811 netstackid_t stackid; 812 void * netstack_module; 813 nm_state_t *nms = &ns->netstack_m_state[moduleid]; 814 boolean_t dropped = B_FALSE; 815 816 ASSERT(MUTEX_HELD(lockp)); 817 mutex_enter(&ns->netstack_lock); 818 819 if (wait_for_nms_inprogress(ns, nms, lockp)) 820 dropped = B_TRUE; 821 822 if (nms->nms_flags & NSS_DESTROY_NEEDED) { 823 nms->nms_flags &= ~NSS_DESTROY_NEEDED; 824 nms->nms_flags |= NSS_DESTROY_INPROGRESS; 825 DTRACE_PROBE2(netstack__destroy__inprogress, 826 netstack_t *, ns, int, moduleid); 827 mutex_exit(&ns->netstack_lock); 828 mutex_exit(lockp); 829 dropped = B_TRUE; 830 831 ASSERT(ns_reg[moduleid].nr_destroy != NULL); 832 stackid = ns->netstack_stackid; 833 netstack_module = ns->netstack_modules[moduleid]; 834 DTRACE_PROBE2(netstack__destroy__start, 835 netstackid_t, stackid, 836 void *, netstack_module); 837 (ns_reg[moduleid].nr_destroy)(stackid, netstack_module); 838 DTRACE_PROBE1(netstack__destroy__end, 839 netstack_t *, ns); 840 841 mutex_enter(lockp); 842 mutex_enter(&ns->netstack_lock); 843 ns->netstack_modules[moduleid] = NULL; 844 nms->nms_flags &= ~NSS_DESTROY_INPROGRESS; 845 nms->nms_flags |= NSS_DESTROY_COMPLETED; 846 cv_broadcast(&nms->nms_cv); 847 DTRACE_PROBE2(netstack__destroy__completed, 848 netstack_t *, ns, int, moduleid); 849 mutex_exit(&ns->netstack_lock); 850 return (dropped); 851 } else { 852 mutex_exit(&ns->netstack_lock); 853 return (dropped); 854 } 855 } 856 857 /* 858 * If somebody is creating the netstack (due to a new zone being created) 859 * then we wait for them to complete. This ensures that any additional 860 * netstack_register() doesn't cause the create functions to run out of 861 * order. 862 * Note that we do not need such a global wait in the case of the shutdown 863 * and destroy callbacks, since in that case it is sufficient for both 864 * threads to set NEEDED and wait for INPROGRESS to ensure ordering. 865 * Returns true if lockp was temporarily dropped while waiting. 866 */ 867 static boolean_t 868 wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp) 869 { 870 boolean_t dropped = B_FALSE; 871 872 mutex_enter(&ns->netstack_lock); 873 while (ns->netstack_flags & NSF_ZONE_CREATE) { 874 DTRACE_PROBE1(netstack__wait__zone__inprogress, 875 netstack_t *, ns); 876 if (lockp != NULL) { 877 dropped = B_TRUE; 878 mutex_exit(lockp); 879 } 880 cv_wait(&ns->netstack_cv, &ns->netstack_lock); 881 if (lockp != NULL) { 882 /* First drop netstack_lock to preserve order */ 883 mutex_exit(&ns->netstack_lock); 884 mutex_enter(lockp); 885 mutex_enter(&ns->netstack_lock); 886 } 887 } 888 mutex_exit(&ns->netstack_lock); 889 return (dropped); 890 } 891 892 /* 893 * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid 894 * combination. 895 * Returns true if lockp was temporarily dropped while waiting. 896 */ 897 static boolean_t 898 wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp) 899 { 900 boolean_t dropped = B_FALSE; 901 902 while (nms->nms_flags & NSS_ALL_INPROGRESS) { 903 DTRACE_PROBE2(netstack__wait__nms__inprogress, 904 netstack_t *, ns, nm_state_t *, nms); 905 if (lockp != NULL) { 906 dropped = B_TRUE; 907 mutex_exit(lockp); 908 } 909 cv_wait(&nms->nms_cv, &ns->netstack_lock); 910 if (lockp != NULL) { 911 /* First drop netstack_lock to preserve order */ 912 mutex_exit(&ns->netstack_lock); 913 mutex_enter(lockp); 914 mutex_enter(&ns->netstack_lock); 915 } 916 } 917 return (dropped); 918 } 919 920 /* 921 * Get the stack instance used in caller's zone. 922 * Increases the reference count, caller must do a netstack_rele. 923 * It can't be called after zone_destroy() has started. 924 */ 925 netstack_t * 926 netstack_get_current(void) 927 { 928 netstack_t *ns; 929 930 ns = curproc->p_zone->zone_netstack; 931 ASSERT(ns != NULL); 932 return (netstack_hold_if_active(ns)); 933 } 934 935 /* 936 * Find a stack instance given the cred. 937 * This is used by the modules to potentially allow for a future when 938 * something other than the zoneid is used to determine the stack. 939 */ 940 netstack_t * 941 netstack_find_by_cred(const cred_t *cr) 942 { 943 zoneid_t zoneid = crgetzoneid(cr); 944 945 /* Handle the case when cr_zone is NULL */ 946 if (zoneid == (zoneid_t)-1) 947 zoneid = GLOBAL_ZONEID; 948 949 /* For performance ... */ 950 if (curproc->p_zone->zone_id == zoneid) 951 return (netstack_get_current()); 952 else 953 return (netstack_find_by_zoneid(zoneid)); 954 } 955 956 /* 957 * Find a stack instance given the zoneid. 958 * Increases the reference count if found; caller must do a 959 * netstack_rele(). 960 * 961 * If there is no exact match then assume the shared stack instance 962 * matches. 963 * 964 * Skip the uninitialized and closing ones. 965 */ 966 netstack_t * 967 netstack_find_by_zoneid(zoneid_t zoneid) 968 { 969 netstack_t *ns; 970 zone_t *zone; 971 972 zone = zone_find_by_id(zoneid); 973 974 if (zone == NULL) 975 return (NULL); 976 977 ASSERT(zone->zone_netstack != NULL); 978 ns = netstack_hold_if_active(zone->zone_netstack); 979 980 zone_rele(zone); 981 return (ns); 982 } 983 984 /* 985 * Find a stack instance given the zoneid. Can only be called from 986 * the create callback. See the comments in zone_find_by_id_nolock why 987 * that limitation exists. 988 * 989 * Increases the reference count if found; caller must do a 990 * netstack_rele(). 991 * 992 * If there is no exact match then assume the shared stack instance 993 * matches. 994 * 995 * Skip the unitialized ones. 996 */ 997 netstack_t * 998 netstack_find_by_zoneid_nolock(zoneid_t zoneid) 999 { 1000 zone_t *zone; 1001 1002 zone = zone_find_by_id_nolock(zoneid); 1003 1004 if (zone == NULL) 1005 return (NULL); 1006 1007 ASSERT(zone->zone_netstack != NULL); 1008 /* zone_find_by_id_nolock does not have a hold on the zone */ 1009 return (netstack_hold_if_active(zone->zone_netstack)); 1010 } 1011 1012 /* 1013 * Find a stack instance given the stackid with exact match? 1014 * Increases the reference count if found; caller must do a 1015 * netstack_rele(). 1016 * 1017 * Skip the unitialized ones. 1018 */ 1019 netstack_t * 1020 netstack_find_by_stackid(netstackid_t stackid) 1021 { 1022 netstack_t *ns; 1023 1024 mutex_enter(&netstack_g_lock); 1025 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { 1026 /* Can't use hold_if_active because of stackid check. */ 1027 mutex_enter(&ns->netstack_lock); 1028 if (ns->netstack_stackid == stackid && 1029 !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) { 1030 netstack_hold_locked(ns); 1031 mutex_exit(&ns->netstack_lock); 1032 mutex_exit(&netstack_g_lock); 1033 return (ns); 1034 } 1035 mutex_exit(&ns->netstack_lock); 1036 } 1037 mutex_exit(&netstack_g_lock); 1038 return (NULL); 1039 } 1040 1041 boolean_t 1042 netstack_inuse_by_stackid(netstackid_t stackid) 1043 { 1044 netstack_t *ns; 1045 boolean_t rval = B_FALSE; 1046 1047 mutex_enter(&netstack_g_lock); 1048 1049 for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) { 1050 if (ns->netstack_stackid == stackid) { 1051 rval = B_TRUE; 1052 break; 1053 } 1054 } 1055 1056 mutex_exit(&netstack_g_lock); 1057 1058 return (rval); 1059 } 1060 1061 1062 static void 1063 netstack_reap(void *arg) 1064 { 1065 netstack_t **nsp, *ns = (netstack_t *)arg; 1066 boolean_t found; 1067 int i; 1068 1069 /* 1070 * Time to call the destroy functions and free up 1071 * the structure 1072 */ 1073 netstack_stack_inactive(ns); 1074 1075 /* Make sure nothing increased the references */ 1076 ASSERT(ns->netstack_refcnt == 0); 1077 ASSERT(ns->netstack_numzones == 0); 1078 1079 /* Finally remove from list of netstacks */ 1080 mutex_enter(&netstack_g_lock); 1081 found = B_FALSE; 1082 for (nsp = &netstack_head; *nsp != NULL; 1083 nsp = &(*nsp)->netstack_next) { 1084 if (*nsp == ns) { 1085 *nsp = ns->netstack_next; 1086 ns->netstack_next = NULL; 1087 found = B_TRUE; 1088 break; 1089 } 1090 } 1091 ASSERT(found); 1092 mutex_exit(&netstack_g_lock); 1093 1094 /* Make sure nothing increased the references */ 1095 ASSERT(ns->netstack_refcnt == 0); 1096 ASSERT(ns->netstack_numzones == 0); 1097 1098 ASSERT(ns->netstack_flags & NSF_CLOSING); 1099 1100 for (i = 0; i < NS_MAX; i++) { 1101 nm_state_t *nms = &ns->netstack_m_state[i]; 1102 1103 cv_destroy(&nms->nms_cv); 1104 } 1105 mutex_destroy(&ns->netstack_lock); 1106 cv_destroy(&ns->netstack_cv); 1107 kmem_free(ns, sizeof (*ns)); 1108 /* Allow another reap to be scheduled. */ 1109 sema_v(&netstack_reap_limiter); 1110 } 1111 1112 void 1113 netstack_rele(netstack_t *ns) 1114 { 1115 int refcnt, numzones; 1116 1117 mutex_enter(&ns->netstack_lock); 1118 ASSERT(ns->netstack_refcnt > 0); 1119 ns->netstack_refcnt--; 1120 /* 1121 * As we drop the lock additional netstack_rele()s can come in 1122 * and decrement the refcnt to zero and free the netstack_t. 1123 * Store pointers in local variables and if we were not the last 1124 * then don't reference the netstack_t after that. 1125 */ 1126 refcnt = ns->netstack_refcnt; 1127 numzones = ns->netstack_numzones; 1128 DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns); 1129 mutex_exit(&ns->netstack_lock); 1130 1131 if (refcnt == 0 && numzones == 0) { 1132 /* 1133 * Because there are possibilities of re-entrancy in various 1134 * netstack structures by callers, which might cause a lock up 1135 * due to odd reference models, or other factors, we choose to 1136 * schedule the actual deletion of this netstack as a deferred 1137 * task on the system taskq. This way, any such reference 1138 * models won't trip over themselves. 1139 * 1140 * Assume we aren't in a high-priority interrupt context, so 1141 * we can use KM_SLEEP and semaphores. 1142 */ 1143 if (sema_tryp(&netstack_reap_limiter) == 0) { 1144 /* 1145 * Indicate we're slamming against a limit. 1146 */ 1147 hrtime_t measurement = gethrtime(); 1148 1149 sema_p(&netstack_reap_limiter); 1150 /* Capture delay in ns. */ 1151 DTRACE_PROBE1(netstack__reap__rate__limited, 1152 hrtime_t, gethrtime() - measurement); 1153 } 1154 1155 /* TQ_SLEEP should prevent taskq_dispatch() from failing. */ 1156 (void) taskq_dispatch(system_taskq, netstack_reap, ns, 1157 TQ_SLEEP); 1158 } 1159 } 1160 1161 static void 1162 netstack_hold_locked(netstack_t *ns) 1163 { 1164 ASSERT(MUTEX_HELD(&ns->netstack_lock)); 1165 ns->netstack_refcnt++; 1166 ASSERT(ns->netstack_refcnt > 0); 1167 DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns); 1168 } 1169 1170 /* 1171 * If the passed-in netstack isn't active (i.e. it's uninitialized or closing), 1172 * return NULL, otherwise return it with its reference held. Common code 1173 * for many netstack_find*() functions. 1174 */ 1175 netstack_t * 1176 netstack_hold_if_active(netstack_t *ns) 1177 { 1178 netstack_t *retval; 1179 1180 mutex_enter(&ns->netstack_lock); 1181 if (ns->netstack_flags & (NSF_UNINIT | NSF_CLOSING)) { 1182 retval = NULL; 1183 } else { 1184 netstack_hold_locked(ns); 1185 retval = ns; 1186 } 1187 mutex_exit(&ns->netstack_lock); 1188 1189 return (retval); 1190 } 1191 1192 void 1193 netstack_hold(netstack_t *ns) 1194 { 1195 mutex_enter(&ns->netstack_lock); 1196 netstack_hold_locked(ns); 1197 mutex_exit(&ns->netstack_lock); 1198 } 1199 1200 /* 1201 * To support kstat_create_netstack() using kstat_zone_add we need 1202 * to track both 1203 * - all zoneids that use the global/shared stack 1204 * - all kstats that have been added for the shared stack 1205 */ 1206 kstat_t * 1207 kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name, 1208 char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags, 1209 netstackid_t ks_netstackid) 1210 { 1211 kstat_t *ks; 1212 1213 if (ks_netstackid == GLOBAL_NETSTACKID) { 1214 ks = kstat_create_zone(ks_module, ks_instance, ks_name, 1215 ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID); 1216 if (ks != NULL) 1217 netstack_shared_kstat_add(ks); 1218 return (ks); 1219 } else { 1220 zoneid_t zoneid = ks_netstackid; 1221 1222 return (kstat_create_zone(ks_module, ks_instance, ks_name, 1223 ks_class, ks_type, ks_ndata, ks_flags, zoneid)); 1224 } 1225 } 1226 1227 void 1228 kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid) 1229 { 1230 if (ks_netstackid == GLOBAL_NETSTACKID) { 1231 netstack_shared_kstat_remove(ks); 1232 } 1233 kstat_delete(ks); 1234 } 1235 1236 static void 1237 netstack_shared_zone_add(zoneid_t zoneid) 1238 { 1239 struct shared_zone_list *sz; 1240 struct shared_kstat_list *sk; 1241 1242 sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP); 1243 sz->sz_zoneid = zoneid; 1244 1245 /* Insert in list */ 1246 mutex_enter(&netstack_shared_lock); 1247 sz->sz_next = netstack_shared_zones; 1248 netstack_shared_zones = sz; 1249 1250 /* 1251 * Perform kstat_zone_add for each existing shared stack kstat. 1252 * Note: Holds netstack_shared_lock lock across kstat_zone_add. 1253 */ 1254 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) { 1255 kstat_zone_add(sk->sk_kstat, zoneid); 1256 } 1257 mutex_exit(&netstack_shared_lock); 1258 } 1259 1260 static void 1261 netstack_shared_zone_remove(zoneid_t zoneid) 1262 { 1263 struct shared_zone_list **szp, *sz; 1264 struct shared_kstat_list *sk; 1265 1266 /* Find in list */ 1267 mutex_enter(&netstack_shared_lock); 1268 sz = NULL; 1269 for (szp = &netstack_shared_zones; *szp != NULL; 1270 szp = &((*szp)->sz_next)) { 1271 if ((*szp)->sz_zoneid == zoneid) { 1272 sz = *szp; 1273 break; 1274 } 1275 } 1276 /* We must find it */ 1277 ASSERT(sz != NULL); 1278 *szp = sz->sz_next; 1279 sz->sz_next = NULL; 1280 1281 /* 1282 * Perform kstat_zone_remove for each existing shared stack kstat. 1283 * Note: Holds netstack_shared_lock lock across kstat_zone_remove. 1284 */ 1285 for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) { 1286 kstat_zone_remove(sk->sk_kstat, zoneid); 1287 } 1288 mutex_exit(&netstack_shared_lock); 1289 1290 kmem_free(sz, sizeof (*sz)); 1291 } 1292 1293 static void 1294 netstack_shared_kstat_add(kstat_t *ks) 1295 { 1296 struct shared_zone_list *sz; 1297 struct shared_kstat_list *sk; 1298 1299 sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP); 1300 sk->sk_kstat = ks; 1301 1302 /* Insert in list */ 1303 mutex_enter(&netstack_shared_lock); 1304 sk->sk_next = netstack_shared_kstats; 1305 netstack_shared_kstats = sk; 1306 1307 /* 1308 * Perform kstat_zone_add for each existing shared stack zone. 1309 * Note: Holds netstack_shared_lock lock across kstat_zone_add. 1310 */ 1311 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { 1312 kstat_zone_add(ks, sz->sz_zoneid); 1313 } 1314 mutex_exit(&netstack_shared_lock); 1315 } 1316 1317 static void 1318 netstack_shared_kstat_remove(kstat_t *ks) 1319 { 1320 struct shared_zone_list *sz; 1321 struct shared_kstat_list **skp, *sk; 1322 1323 /* Find in list */ 1324 mutex_enter(&netstack_shared_lock); 1325 sk = NULL; 1326 for (skp = &netstack_shared_kstats; *skp != NULL; 1327 skp = &((*skp)->sk_next)) { 1328 if ((*skp)->sk_kstat == ks) { 1329 sk = *skp; 1330 break; 1331 } 1332 } 1333 /* Must find it */ 1334 ASSERT(sk != NULL); 1335 *skp = sk->sk_next; 1336 sk->sk_next = NULL; 1337 1338 /* 1339 * Perform kstat_zone_remove for each existing shared stack kstat. 1340 * Note: Holds netstack_shared_lock lock across kstat_zone_remove. 1341 */ 1342 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { 1343 kstat_zone_remove(ks, sz->sz_zoneid); 1344 } 1345 mutex_exit(&netstack_shared_lock); 1346 kmem_free(sk, sizeof (*sk)); 1347 } 1348 1349 /* 1350 * If a zoneid is part of the shared zone, return true 1351 */ 1352 static boolean_t 1353 netstack_find_shared_zoneid(zoneid_t zoneid) 1354 { 1355 struct shared_zone_list *sz; 1356 1357 mutex_enter(&netstack_shared_lock); 1358 for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) { 1359 if (sz->sz_zoneid == zoneid) { 1360 mutex_exit(&netstack_shared_lock); 1361 return (B_TRUE); 1362 } 1363 } 1364 mutex_exit(&netstack_shared_lock); 1365 return (B_FALSE); 1366 } 1367 1368 /* 1369 * Hide the fact that zoneids and netstackids are allocated from 1370 * the same space in the current implementation. 1371 * We currently do not check that the stackid/zoneids are valid, since there 1372 * is no need for that. But this should only be done for ids that are 1373 * valid. 1374 */ 1375 zoneid_t 1376 netstackid_to_zoneid(netstackid_t stackid) 1377 { 1378 return (stackid); 1379 } 1380 1381 netstackid_t 1382 zoneid_to_netstackid(zoneid_t zoneid) 1383 { 1384 if (netstack_find_shared_zoneid(zoneid)) 1385 return (GLOBAL_ZONEID); 1386 else 1387 return (zoneid); 1388 } 1389 1390 zoneid_t 1391 netstack_get_zoneid(netstack_t *ns) 1392 { 1393 return (netstackid_to_zoneid(ns->netstack_stackid)); 1394 } 1395 1396 /* 1397 * Simplistic support for walking all the handles. 1398 * Example usage: 1399 * netstack_handle_t nh; 1400 * netstack_t *ns; 1401 * 1402 * netstack_next_init(&nh); 1403 * while ((ns = netstack_next(&nh)) != NULL) { 1404 * do something; 1405 * netstack_rele(ns); 1406 * } 1407 * netstack_next_fini(&nh); 1408 */ 1409 void 1410 netstack_next_init(netstack_handle_t *handle) 1411 { 1412 *handle = 0; 1413 } 1414 1415 /* ARGSUSED */ 1416 void 1417 netstack_next_fini(netstack_handle_t *handle) 1418 { 1419 } 1420 1421 netstack_t * 1422 netstack_next(netstack_handle_t *handle) 1423 { 1424 netstack_t *ns; 1425 int i, end; 1426 1427 end = *handle; 1428 /* Walk skipping *handle number of instances */ 1429 1430 /* Look if there is a matching stack instance */ 1431 mutex_enter(&netstack_g_lock); 1432 ns = netstack_head; 1433 for (i = 0; i < end; i++) { 1434 if (ns == NULL) 1435 break; 1436 ns = ns->netstack_next; 1437 } 1438 /* 1439 * Skip those that aren't really here (uninitialized or closing). 1440 * Can't use hold_if_active because of "end" tracking. 1441 */ 1442 while (ns != NULL) { 1443 mutex_enter(&ns->netstack_lock); 1444 if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) { 1445 *handle = end + 1; 1446 netstack_hold_locked(ns); 1447 mutex_exit(&ns->netstack_lock); 1448 break; 1449 } 1450 mutex_exit(&ns->netstack_lock); 1451 end++; 1452 ns = ns->netstack_next; 1453 } 1454 mutex_exit(&netstack_g_lock); 1455 return (ns); 1456 } 1457