1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * Copyright (c) 2016 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * Copyright (c) 2016, Joyent, Inc. All rights reserved. 30 */ 31 32 /* 33 * The ipnet device defined here provides access to packets at the IP layer. To 34 * provide access to packets at this layer it registers a callback function in 35 * the ip module and when there are open instances of the device ip will pass 36 * packets into the device. Packets from ip are passed on the input, output and 37 * loopback paths. Internally the module returns to ip as soon as possible by 38 * deferring processing using a taskq. 39 * 40 * Management of the devices in /dev/ipnet/ is handled by the devname 41 * filesystem and use of the neti interfaces. This module registers for NIC 42 * events using the neti framework so that when IP interfaces are bought up, 43 * taken down etc. the ipnet module is notified and its view of the interfaces 44 * configured on the system adjusted. On attach, the module gets an initial 45 * view of the system again using the neti framework but as it has already 46 * registered for IP interface events, it is still up-to-date with any changes. 47 */ 48 49 #include <sys/types.h> 50 #include <sys/conf.h> 51 #include <sys/cred.h> 52 #include <sys/stat.h> 53 #include <sys/ddi.h> 54 #include <sys/sunddi.h> 55 #include <sys/modctl.h> 56 #include <sys/dlpi.h> 57 #include <sys/strsun.h> 58 #include <sys/id_space.h> 59 #include <sys/kmem.h> 60 #include <sys/mkdev.h> 61 #include <sys/neti.h> 62 #include <net/if.h> 63 #include <sys/errno.h> 64 #include <sys/list.h> 65 #include <sys/ksynch.h> 66 #include <sys/hook_event.h> 67 #include <sys/sdt.h> 68 #include <sys/stropts.h> 69 #include <sys/sysmacros.h> 70 #include <inet/ip.h> 71 #include <inet/ip_if.h> 72 #include <inet/ip_multi.h> 73 #include <inet/ip6.h> 74 #include <inet/ipnet.h> 75 #include <net/bpf.h> 76 #include <net/bpfdesc.h> 77 #include <net/dlt.h> 78 79 static struct module_info ipnet_minfo = { 80 1, /* mi_idnum */ 81 "ipnet", /* mi_idname */ 82 0, /* mi_minpsz */ 83 INFPSZ, /* mi_maxpsz */ 84 2048, /* mi_hiwat */ 85 0 /* mi_lowat */ 86 }; 87 88 /* 89 * List to hold static view of ipnetif_t's on the system. This is needed to 90 * avoid holding the lock protecting the avl tree of ipnetif's over the 91 * callback into the dev filesystem. 92 */ 93 typedef struct ipnetif_cbdata { 94 char ic_ifname[LIFNAMSIZ]; 95 dev_t ic_dev; 96 list_node_t ic_next; 97 } ipnetif_cbdata_t; 98 99 /* 100 * Convenience enumerated type for ipnet_accept(). It describes the 101 * properties of a given ipnet_addrp_t relative to a single ipnet_t 102 * client stream. The values represent whether the address is ... 103 */ 104 typedef enum { 105 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ 106 IPNETADDR_MBCAST, /* a multicast or broadcast address. */ 107 IPNETADDR_UNKNOWN /* none of the above. */ 108 } ipnet_addrtype_t; 109 110 /* Argument used for the ipnet_nicevent_taskq callback. */ 111 typedef struct ipnet_nicevent_s { 112 nic_event_t ipne_event; 113 net_handle_t ipne_protocol; 114 netstackid_t ipne_stackid; 115 uint64_t ipne_ifindex; 116 uint64_t ipne_lifindex; 117 char ipne_ifname[LIFNAMSIZ]; 118 } ipnet_nicevent_t; 119 120 static dev_info_t *ipnet_dip; 121 static major_t ipnet_major; 122 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ 123 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ 124 static id_space_t *ipnet_minor_space; 125 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ 126 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ 127 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; 128 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; 129 static bpf_itap_fn_t ipnet_itap; 130 131 static void ipnet_input(mblk_t *); 132 static int ipnet_wput(queue_t *, mblk_t *); 133 static int ipnet_rsrv(queue_t *); 134 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); 135 static int ipnet_close(queue_t *, int, cred_t *); 136 static void ipnet_ioctl(queue_t *, mblk_t *); 137 static void ipnet_iocdata(queue_t *, mblk_t *); 138 static void ipnet_wputnondata(queue_t *, mblk_t *); 139 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); 140 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); 141 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 142 static void ipnet_inforeq(queue_t *q, mblk_t *mp); 143 static void ipnet_bindreq(queue_t *q, mblk_t *mp); 144 static void ipnet_unbindreq(queue_t *q, mblk_t *mp); 145 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); 146 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); 147 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); 148 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); 149 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); 150 static void ipnet_nicevent_task(void *); 151 static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *, 152 uint64_t); 153 static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *); 154 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); 155 static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *); 156 static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *); 157 static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); 158 static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *); 159 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); 160 static int ipnetif_compare_name(const void *, const void *); 161 static int ipnetif_compare_name_zone(const void *, const void *); 162 static int ipnetif_compare_index(const void *, const void *); 163 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); 164 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); 165 static void ipnetif_refhold(ipnetif_t *); 166 static void ipnetif_refrele(ipnetif_t *); 167 static void ipnet_walkers_inc(ipnet_stack_t *); 168 static void ipnet_walkers_dec(ipnet_stack_t *); 169 static void ipnet_register_netihook(ipnet_stack_t *); 170 static void *ipnet_stack_init(netstackid_t, netstack_t *); 171 static void ipnet_stack_fini(netstackid_t, void *); 172 static void ipnet_dispatch(void *); 173 static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *); 174 static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *); 175 static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t); 176 static void ipnetif_clone_release(ipnetif_t *); 177 178 static struct qinit ipnet_rinit = { 179 NULL, /* qi_putp */ 180 ipnet_rsrv, /* qi_srvp */ 181 ipnet_open, /* qi_qopen */ 182 ipnet_close, /* qi_qclose */ 183 NULL, /* qi_qadmin */ 184 &ipnet_minfo, /* qi_minfo */ 185 }; 186 187 static struct qinit ipnet_winit = { 188 ipnet_wput, /* qi_putp */ 189 NULL, /* qi_srvp */ 190 NULL, /* qi_qopen */ 191 NULL, /* qi_qclose */ 192 NULL, /* qi_qadmin */ 193 &ipnet_minfo, /* qi_minfo */ 194 }; 195 196 static struct streamtab ipnet_info = { 197 &ipnet_rinit, &ipnet_winit 198 }; 199 200 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, 201 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, 202 ddi_quiesce_not_supported); 203 204 static struct modldrv modldrv = { 205 &mod_driverops, 206 "STREAMS ipnet driver", 207 &ipnet_ops 208 }; 209 210 static struct modlinkage modlinkage = { 211 MODREV_1, &modldrv, NULL 212 }; 213 214 /* 215 * This structure contains the template data (names and type) that is 216 * copied, in bulk, into the new kstats structure created by net_kstat_create. 217 * No actual statistical information is stored in this instance of the 218 * ipnet_kstats_t structure. 219 */ 220 static ipnet_kstats_t stats_template = { 221 { "duplicationFail", KSTAT_DATA_UINT64 }, 222 { "dispatchOk", KSTAT_DATA_UINT64 }, 223 { "dispatchFail", KSTAT_DATA_UINT64 }, 224 { "dispatchHeaderDrop", KSTAT_DATA_UINT64 }, 225 { "dispatchDupDrop", KSTAT_DATA_UINT64 }, 226 { "dispatchDeliver", KSTAT_DATA_UINT64 }, 227 { "acceptOk", KSTAT_DATA_UINT64 }, 228 { "acceptFail", KSTAT_DATA_UINT64 } 229 }; 230 231 /* 232 * Walk the list of physical interfaces on the machine, for each 233 * interface create a new ipnetif_t and add any addresses to it. We 234 * need to do the walk twice, once for IPv4 and once for IPv6. 235 * 236 * The interfaces are destroyed as part of ipnet_stack_fini() for each 237 * stack. Note that we cannot do this initialization in 238 * ipnet_stack_init(), since ipnet_stack_init() cannot fail. 239 */ 240 static int 241 ipnetif_init(void) 242 { 243 netstack_handle_t nh; 244 netstack_t *ns; 245 ipnet_stack_t *ips; 246 int ret = 0; 247 248 netstack_next_init(&nh); 249 while ((ns = netstack_next(&nh)) != NULL) { 250 ips = ns->netstack_ipnet; 251 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0) 252 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE); 253 netstack_rele(ns); 254 if (ret != 0) 255 break; 256 } 257 netstack_next_fini(&nh); 258 return (ret); 259 } 260 261 /* 262 * Standard module entry points. 263 */ 264 int 265 _init(void) 266 { 267 int ret; 268 boolean_t netstack_registered = B_FALSE; 269 270 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) 271 return (ENODEV); 272 ipnet_minor_space = id_space_create("ipnet_minor_space", 273 IPNET_MINOR_MIN, MAXMIN32); 274 275 /* 276 * We call ddi_taskq_create() with nthread == 1 to ensure in-order 277 * delivery of packets to clients. Note that we need to create the 278 * taskqs before calling netstack_register() since ipnet_stack_init() 279 * registers callbacks that use 'em. 280 */ 281 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); 282 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", 283 1, TASKQ_DEFAULTPRI, 0); 284 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { 285 ret = ENOMEM; 286 goto done; 287 } 288 289 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); 290 netstack_registered = B_TRUE; 291 292 if ((ret = ipnetif_init()) == 0) 293 ret = mod_install(&modlinkage); 294 done: 295 if (ret != 0) { 296 if (ipnet_taskq != NULL) 297 ddi_taskq_destroy(ipnet_taskq); 298 if (ipnet_nicevent_taskq != NULL) 299 ddi_taskq_destroy(ipnet_nicevent_taskq); 300 if (netstack_registered) 301 netstack_unregister(NS_IPNET); 302 id_space_destroy(ipnet_minor_space); 303 } 304 return (ret); 305 } 306 307 int 308 _fini(void) 309 { 310 int err; 311 312 if ((err = mod_remove(&modlinkage)) != 0) 313 return (err); 314 315 netstack_unregister(NS_IPNET); 316 ddi_taskq_destroy(ipnet_nicevent_taskq); 317 ddi_taskq_destroy(ipnet_taskq); 318 id_space_destroy(ipnet_minor_space); 319 return (0); 320 } 321 322 int 323 _info(struct modinfo *modinfop) 324 { 325 return (mod_info(&modlinkage, modinfop)); 326 } 327 328 static void 329 ipnet_register_netihook(ipnet_stack_t *ips) 330 { 331 int ret; 332 zoneid_t zoneid; 333 netid_t netid; 334 335 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", 336 ips); 337 338 /* 339 * It is possible for an exclusive stack to be in the process of 340 * shutting down here, and the netid and protocol lookups could fail 341 * in that case. 342 */ 343 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid); 344 if ((netid = net_zoneidtonetid(zoneid)) == -1) 345 return; 346 347 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) { 348 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, 349 ips->ips_nicevents)) != 0) { 350 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 351 ips->ips_ndv4 = NULL; 352 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks" 353 " in zone %d: %d", zoneid, ret); 354 } 355 } 356 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) { 357 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, 358 ips->ips_nicevents)) != 0) { 359 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 360 ips->ips_ndv6 = NULL; 361 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks" 362 " in zone %d: %d", zoneid, ret); 363 } 364 } 365 366 /* 367 * Create a local set of kstats for each zone. 368 */ 369 ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats", 370 "misc", KSTAT_TYPE_NAMED, 371 sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0); 372 if (ips->ips_kstatp != NULL) { 373 bcopy(&stats_template, &ips->ips_stats, 374 sizeof (ips->ips_stats)); 375 ips->ips_kstatp->ks_data = &ips->ips_stats; 376 ips->ips_kstatp->ks_private = 377 (void *)(uintptr_t)ips->ips_netstack->netstack_stackid; 378 kstat_install(ips->ips_kstatp); 379 } else { 380 cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed", 381 "ipnet", "ipnet_stats", "misc"); 382 } 383 } 384 385 /* 386 * This function is called on attach to build an initial view of the 387 * interfaces on the system. It will be called once for IPv4 and once 388 * for IPv6, although there is only one ipnet interface for both IPv4 389 * and IPv6 there are separate address lists. 390 */ 391 static int 392 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) 393 { 394 phy_if_t phyif; 395 lif_if_t lif; 396 ipnetif_t *ipnetif; 397 char name[LIFNAMSIZ]; 398 boolean_t new_if = B_FALSE; 399 uint64_t ifflags; 400 int ret = 0; 401 402 /* 403 * If ipnet_register_netihook() was unable to initialize this 404 * stack's net_handle_t, then we cannot populate any interface 405 * information. This usually happens when we attempted to 406 * grab a net_handle_t as a stack was shutting down. We don't 407 * want to fail the entire _init() operation because of a 408 * stack shutdown (other stacks will continue to work just 409 * fine), so we silently return success here. 410 */ 411 if (nd == NULL) 412 return (0); 413 414 /* 415 * Make sure we're not processing NIC events during the 416 * population of our interfaces and address lists. 417 */ 418 mutex_enter(&ips->ips_event_lock); 419 420 for (phyif = net_phygetnext(nd, 0); phyif != 0; 421 phyif = net_phygetnext(nd, phyif)) { 422 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) 423 continue; 424 ifflags = 0; 425 (void) net_getlifflags(nd, phyif, 0, &ifflags); 426 if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) { 427 ipnetif = ipnetif_create(name, phyif, ips, ifflags); 428 if (ipnetif == NULL) { 429 ret = ENOMEM; 430 goto done; 431 } 432 new_if = B_TRUE; 433 } 434 ipnetif->if_flags |= 435 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 436 437 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; 438 lif = net_lifgetnext(nd, phyif, lif)) { 439 /* 440 * Skip addresses that aren't up. We'll add 441 * them when we receive an NE_LIF_UP event. 442 */ 443 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || 444 !(ifflags & IFF_UP)) 445 continue; 446 /* Don't add it if we already have it. */ 447 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) 448 continue; 449 ipnet_add_ifaddr(lif, ipnetif, nd); 450 } 451 if (!new_if) 452 ipnetif_refrele(ipnetif); 453 } 454 455 done: 456 mutex_exit(&ips->ips_event_lock); 457 return (ret); 458 } 459 460 static int 461 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 462 { 463 if (cmd != DDI_ATTACH) 464 return (DDI_FAILURE); 465 466 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, 467 DDI_PSEUDO, 0) == DDI_FAILURE) 468 return (DDI_FAILURE); 469 470 ipnet_dip = dip; 471 return (DDI_SUCCESS); 472 } 473 474 static int 475 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 476 { 477 if (cmd != DDI_DETACH) 478 return (DDI_FAILURE); 479 480 ASSERT(dip == ipnet_dip); 481 ddi_remove_minor_node(ipnet_dip, NULL); 482 ipnet_dip = NULL; 483 return (DDI_SUCCESS); 484 } 485 486 /* ARGSUSED */ 487 static int 488 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 489 { 490 int error = DDI_FAILURE; 491 492 switch (infocmd) { 493 case DDI_INFO_DEVT2INSTANCE: 494 *result = (void *)0; 495 error = DDI_SUCCESS; 496 break; 497 case DDI_INFO_DEVT2DEVINFO: 498 if (ipnet_dip != NULL) { 499 *result = ipnet_dip; 500 error = DDI_SUCCESS; 501 } 502 break; 503 } 504 return (error); 505 } 506 507 /* ARGSUSED */ 508 static int 509 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) 510 { 511 ipnet_t *ipnet; 512 netstack_t *ns = NULL; 513 ipnet_stack_t *ips; 514 int err = 0; 515 zoneid_t zoneid = crgetzoneid(crp); 516 517 /* 518 * If the system is labeled, only the global zone is allowed to open 519 * IP observability nodes. 520 */ 521 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 522 return (EACCES); 523 524 /* We don't support open as a module */ 525 if (sflag & MODOPEN) 526 return (ENOTSUP); 527 528 /* This driver is self-cloning, we don't support re-open. */ 529 if (rq->q_ptr != NULL) 530 return (EBUSY); 531 532 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) 533 return (ENOMEM); 534 535 VERIFY((ns = netstack_find_by_cred(crp)) != NULL); 536 ips = ns->netstack_ipnet; 537 538 rq->q_ptr = WR(rq)->q_ptr = ipnet; 539 ipnet->ipnet_rq = rq; 540 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); 541 ipnet->ipnet_zoneid = zoneid; 542 ipnet->ipnet_dlstate = DL_UNBOUND; 543 ipnet->ipnet_ns = ns; 544 545 /* 546 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need 547 * to be processed after ipnet_if is set and the ipnet_t has been 548 * inserted in the ips_str_list. 549 */ 550 mutex_enter(&ips->ips_event_lock); 551 if (getminor(*dev) == IPNET_MINOR_LO) { 552 ipnet->ipnet_flags |= IPNET_LOMODE; 553 ipnet->ipnet_acceptfn = ipnet_loaccept; 554 } else { 555 ipnet->ipnet_acceptfn = ipnet_accept; 556 ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips); 557 if (ipnet->ipnet_if == NULL || 558 !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) { 559 err = ENODEV; 560 goto done; 561 } 562 } 563 564 mutex_enter(&ips->ips_walkers_lock); 565 while (ips->ips_walkers_cnt != 0) 566 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 567 list_insert_head(&ips->ips_str_list, ipnet); 568 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); 569 qprocson(rq); 570 571 /* 572 * Only register our callback if we're the first open client; we call 573 * unregister in close() for the last open client. 574 */ 575 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) 576 ips->ips_hook = ipobs_register_hook(ns, ipnet_input); 577 mutex_exit(&ips->ips_walkers_lock); 578 579 done: 580 mutex_exit(&ips->ips_event_lock); 581 if (err != 0) { 582 netstack_rele(ns); 583 id_free(ipnet_minor_space, ipnet->ipnet_minor); 584 if (ipnet->ipnet_if != NULL) 585 ipnetif_refrele(ipnet->ipnet_if); 586 kmem_free(ipnet, sizeof (*ipnet)); 587 } 588 return (err); 589 } 590 591 /* ARGSUSED */ 592 static int 593 ipnet_close(queue_t *rq, int flags __unused, cred_t *credp __unused) 594 { 595 ipnet_t *ipnet = rq->q_ptr; 596 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; 597 598 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 599 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 600 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 601 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 602 603 mutex_enter(&ips->ips_walkers_lock); 604 while (ips->ips_walkers_cnt != 0) 605 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 606 607 qprocsoff(rq); 608 609 list_remove(&ips->ips_str_list, ipnet); 610 if (ipnet->ipnet_if != NULL) 611 ipnetif_refrele(ipnet->ipnet_if); 612 id_free(ipnet_minor_space, ipnet->ipnet_minor); 613 614 if (list_is_empty(&ips->ips_str_list)) { 615 ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook); 616 ips->ips_hook = NULL; 617 } 618 619 kmem_free(ipnet, sizeof (*ipnet)); 620 621 mutex_exit(&ips->ips_walkers_lock); 622 netstack_rele(ips->ips_netstack); 623 return (0); 624 } 625 626 static int 627 ipnet_wput(queue_t *q, mblk_t *mp) 628 { 629 switch (mp->b_datap->db_type) { 630 case M_FLUSH: 631 if (*mp->b_rptr & FLUSHW) { 632 flushq(q, FLUSHDATA); 633 *mp->b_rptr &= ~FLUSHW; 634 } 635 if (*mp->b_rptr & FLUSHR) 636 qreply(q, mp); 637 else 638 freemsg(mp); 639 break; 640 case M_PROTO: 641 case M_PCPROTO: 642 ipnet_wputnondata(q, mp); 643 break; 644 case M_IOCTL: 645 ipnet_ioctl(q, mp); 646 break; 647 case M_IOCDATA: 648 ipnet_iocdata(q, mp); 649 break; 650 default: 651 freemsg(mp); 652 break; 653 } 654 return (0); 655 } 656 657 static int 658 ipnet_rsrv(queue_t *q) 659 { 660 mblk_t *mp; 661 662 while ((mp = getq(q)) != NULL) { 663 ASSERT(DB_TYPE(mp) == M_DATA); 664 if (canputnext(q)) { 665 putnext(q, mp); 666 } else { 667 (void) putbq(q, mp); 668 break; 669 } 670 } 671 return (0); 672 } 673 674 static void 675 ipnet_ioctl(queue_t *q, mblk_t *mp) 676 { 677 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 678 679 switch (iocp->ioc_cmd) { 680 case DLIOCRAW: 681 miocack(q, mp, 0, 0); 682 break; 683 case DLIOCIPNETINFO: 684 if (iocp->ioc_count == TRANSPARENT) { 685 mcopyin(mp, NULL, sizeof (uint_t), NULL); 686 qreply(q, mp); 687 break; 688 } 689 /* We don't support I_STR with DLIOCIPNETINFO. */ 690 /* FALLTHROUGH */ 691 default: 692 miocnak(q, mp, 0, EINVAL); 693 break; 694 } 695 } 696 697 static void 698 ipnet_iocdata(queue_t *q, mblk_t *mp) 699 { 700 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 701 ipnet_t *ipnet = q->q_ptr; 702 703 switch (iocp->ioc_cmd) { 704 case DLIOCIPNETINFO: 705 if (*(int *)mp->b_cont->b_rptr == 1) 706 ipnet->ipnet_flags |= IPNET_INFO; 707 else if (*(int *)mp->b_cont->b_rptr == 0) 708 ipnet->ipnet_flags &= ~IPNET_INFO; 709 else 710 goto iocnak; 711 miocack(q, mp, 0, DL_IPNETINFO_VERSION); 712 break; 713 default: 714 iocnak: 715 miocnak(q, mp, 0, EINVAL); 716 break; 717 } 718 } 719 720 static void 721 ipnet_wputnondata(queue_t *q, mblk_t *mp) 722 { 723 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 724 t_uscalar_t prim = dlp->dl_primitive; 725 726 switch (prim) { 727 case DL_INFO_REQ: 728 ipnet_inforeq(q, mp); 729 break; 730 case DL_UNBIND_REQ: 731 ipnet_unbindreq(q, mp); 732 break; 733 case DL_BIND_REQ: 734 ipnet_bindreq(q, mp); 735 break; 736 case DL_PROMISCON_REQ: 737 ipnet_dlpromisconreq(q, mp); 738 break; 739 case DL_PROMISCOFF_REQ: 740 ipnet_dlpromiscoffreq(q, mp); 741 break; 742 case DL_UNITDATA_REQ: 743 case DL_DETACH_REQ: 744 case DL_PHYS_ADDR_REQ: 745 case DL_SET_PHYS_ADDR_REQ: 746 case DL_ENABMULTI_REQ: 747 case DL_DISABMULTI_REQ: 748 case DL_ATTACH_REQ: 749 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); 750 break; 751 default: 752 dlerrorack(q, mp, prim, DL_BADPRIM, 0); 753 break; 754 } 755 } 756 757 static void 758 ipnet_inforeq(queue_t *q, mblk_t *mp) 759 { 760 dl_info_ack_t *dlip; 761 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); 762 763 if (MBLKL(mp) < DL_INFO_REQ_SIZE) { 764 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); 765 return; 766 } 767 768 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) 769 return; 770 771 dlip = (dl_info_ack_t *)mp->b_rptr; 772 *dlip = ipnet_infoack; 773 qreply(q, mp); 774 } 775 776 static void 777 ipnet_bindreq(queue_t *q, mblk_t *mp) 778 { 779 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 780 ipnet_t *ipnet = q->q_ptr; 781 782 if (MBLKL(mp) < DL_BIND_REQ_SIZE) { 783 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); 784 return; 785 } 786 787 switch (dlp->bind_req.dl_sap) { 788 case 0 : 789 ipnet->ipnet_family = AF_UNSPEC; 790 break; 791 case IPV4_VERSION : 792 ipnet->ipnet_family = AF_INET; 793 break; 794 case IPV6_VERSION : 795 ipnet->ipnet_family = AF_INET6; 796 break; 797 default : 798 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); 799 return; 800 /*NOTREACHED*/ 801 } 802 803 ipnet->ipnet_dlstate = DL_IDLE; 804 dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0); 805 } 806 807 static void 808 ipnet_unbindreq(queue_t *q, mblk_t *mp) 809 { 810 ipnet_t *ipnet = q->q_ptr; 811 812 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { 813 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); 814 return; 815 } 816 817 if (ipnet->ipnet_dlstate != DL_IDLE) { 818 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); 819 } else { 820 ipnet->ipnet_dlstate = DL_UNBOUND; 821 ipnet->ipnet_family = AF_UNSPEC; 822 dlokack(q, mp, DL_UNBIND_REQ); 823 } 824 } 825 826 static void 827 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) 828 { 829 ipnet_t *ipnet = q->q_ptr; 830 t_uscalar_t level; 831 int err; 832 833 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { 834 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 835 return; 836 } 837 838 if (ipnet->ipnet_flags & IPNET_LOMODE) { 839 dlokack(q, mp, DL_PROMISCON_REQ); 840 return; 841 } 842 843 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 844 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 845 if ((err = ipnet_join_allmulti(ipnet->ipnet_if, 846 ipnet->ipnet_ns->netstack_ipnet)) != 0) { 847 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); 848 return; 849 } 850 } 851 852 switch (level) { 853 case DL_PROMISC_PHYS: 854 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; 855 break; 856 case DL_PROMISC_SAP: 857 ipnet->ipnet_flags |= IPNET_PROMISC_SAP; 858 break; 859 case DL_PROMISC_MULTI: 860 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; 861 break; 862 default: 863 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 864 return; 865 } 866 867 dlokack(q, mp, DL_PROMISCON_REQ); 868 } 869 870 static void 871 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) 872 { 873 ipnet_t *ipnet = q->q_ptr; 874 t_uscalar_t level; 875 uint16_t orig_ipnet_flags = ipnet->ipnet_flags; 876 877 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { 878 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 879 return; 880 } 881 882 if (ipnet->ipnet_flags & IPNET_LOMODE) { 883 dlokack(q, mp, DL_PROMISCOFF_REQ); 884 return; 885 } 886 887 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 888 switch (level) { 889 case DL_PROMISC_PHYS: 890 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 891 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; 892 break; 893 case DL_PROMISC_SAP: 894 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) 895 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; 896 break; 897 case DL_PROMISC_MULTI: 898 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 899 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; 900 break; 901 default: 902 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 903 return; 904 } 905 906 if (orig_ipnet_flags == ipnet->ipnet_flags) { 907 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); 908 return; 909 } 910 911 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 912 ipnet_leave_allmulti(ipnet->ipnet_if, 913 ipnet->ipnet_ns->netstack_ipnet); 914 } 915 916 dlokack(q, mp, DL_PROMISCOFF_REQ); 917 } 918 919 static int 920 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 921 { 922 int err = 0; 923 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 924 uint64_t index = ipnetif->if_index; 925 926 mutex_enter(&ips->ips_event_lock); 927 if (ipnetif->if_multicnt == 0) { 928 ASSERT((ipnetif->if_flags & 929 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); 930 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { 931 err = ip_join_allmulti(index, B_FALSE, ipst); 932 if (err != 0) 933 goto done; 934 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; 935 } 936 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { 937 err = ip_join_allmulti(index, B_TRUE, ipst); 938 if (err != 0 && 939 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { 940 (void) ip_leave_allmulti(index, B_FALSE, ipst); 941 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 942 goto done; 943 } 944 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; 945 } 946 } 947 ipnetif->if_multicnt++; 948 949 done: 950 mutex_exit(&ips->ips_event_lock); 951 return (err); 952 } 953 954 static void 955 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 956 { 957 int err; 958 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 959 uint64_t index = ipnetif->if_index; 960 961 mutex_enter(&ips->ips_event_lock); 962 ASSERT(ipnetif->if_multicnt != 0); 963 if (--ipnetif->if_multicnt == 0) { 964 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { 965 err = ip_leave_allmulti(index, B_FALSE, ipst); 966 ASSERT(err == 0 || err == ENODEV); 967 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 968 } 969 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { 970 err = ip_leave_allmulti(index, B_TRUE, ipst); 971 ASSERT(err == 0 || err == ENODEV); 972 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; 973 } 974 } 975 mutex_exit(&ips->ips_event_lock); 976 } 977 978 /* 979 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it. 980 * The structure it copies the header information from, 981 * hook_pkt_observe_t, is constructed using network byte 982 * order in ipobs_hook(), so there is no conversion here. 983 */ 984 static mblk_t * 985 ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp) 986 { 987 mblk_t *dlhdr; 988 dl_ipnetinfo_t *dl; 989 990 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { 991 freemsg(mp); 992 return (NULL); 993 } 994 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; 995 dl->dli_version = DL_IPNETINFO_VERSION; 996 dl->dli_family = hdr->hpo_family; 997 dl->dli_htype = hdr->hpo_htype; 998 dl->dli_pktlen = hdr->hpo_pktlen; 999 dl->dli_ifindex = hdr->hpo_ifindex; 1000 dl->dli_grifindex = hdr->hpo_grifindex; 1001 dl->dli_zsrc = hdr->hpo_zsrc; 1002 dl->dli_zdst = hdr->hpo_zdst; 1003 dlhdr->b_wptr += sizeof (*dl); 1004 dlhdr->b_cont = mp; 1005 1006 return (dlhdr); 1007 } 1008 1009 static ipnet_addrtype_t 1010 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) 1011 { 1012 list_t *list; 1013 ipnetif_t *ipnetif = ipnet->ipnet_if; 1014 ipnetif_addr_t *ifaddr; 1015 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; 1016 1017 /* First check if the address is multicast or limited broadcast. */ 1018 switch (addr->iap_family) { 1019 case AF_INET: 1020 if (CLASSD(*(addr->iap_addr4)) || 1021 *(addr->iap_addr4) == INADDR_BROADCAST) 1022 return (IPNETADDR_MBCAST); 1023 break; 1024 case AF_INET6: 1025 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) 1026 return (IPNETADDR_MBCAST); 1027 break; 1028 } 1029 1030 /* 1031 * Walk the address list to see if the address belongs to our 1032 * interface or is one of our subnet broadcast addresses. 1033 */ 1034 mutex_enter(&ipnetif->if_addr_lock); 1035 list = (addr->iap_family == AF_INET) ? 1036 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; 1037 for (ifaddr = list_head(list); 1038 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; 1039 ifaddr = list_next(list, ifaddr)) { 1040 /* 1041 * If we're not in the global zone, then only look at 1042 * addresses in our zone. 1043 */ 1044 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1045 ipnet->ipnet_zoneid != ifaddr->ifa_zone) 1046 continue; 1047 switch (addr->iap_family) { 1048 case AF_INET: 1049 if (ifaddr->ifa_ip4addr != INADDR_ANY && 1050 *(addr->iap_addr4) == ifaddr->ifa_ip4addr) 1051 addrtype = IPNETADDR_MYADDR; 1052 else if (ifaddr->ifa_brdaddr != INADDR_ANY && 1053 *(addr->iap_addr4) == ifaddr->ifa_brdaddr) 1054 addrtype = IPNETADDR_MBCAST; 1055 break; 1056 case AF_INET6: 1057 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, 1058 &ifaddr->ifa_ip6addr)) 1059 addrtype = IPNETADDR_MYADDR; 1060 break; 1061 } 1062 } 1063 mutex_exit(&ipnetif->if_addr_lock); 1064 1065 return (addrtype); 1066 } 1067 1068 /* 1069 * Verify if the packet contained in hdr should be passed up to the 1070 * ipnet client stream. 1071 */ 1072 static boolean_t 1073 ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src, 1074 ipnet_addrp_t *dst) 1075 { 1076 boolean_t obsif; 1077 uint64_t ifindex = ipnet->ipnet_if->if_index; 1078 ipnet_addrtype_t srctype; 1079 ipnet_addrtype_t dsttype; 1080 1081 srctype = ipnet_get_addrtype(ipnet, src); 1082 dsttype = ipnet_get_addrtype(ipnet, dst); 1083 1084 /* 1085 * If the packet's ifindex matches ours, or the packet's group ifindex 1086 * matches ours, it's on the interface we're observing. (Thus, 1087 * observing on the group ifindex matches all ifindexes in the group.) 1088 */ 1089 obsif = (ntohl(hdr->hpo_ifindex) == ifindex || 1090 ntohl(hdr->hpo_grifindex) == ifindex); 1091 1092 DTRACE_PROBE5(ipnet_accept__addr, 1093 ipnet_addrtype_t, srctype, ipnet_addrp_t *, src, 1094 ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst, 1095 boolean_t, obsif); 1096 1097 /* 1098 * Do not allow an ipnet stream to see packets that are not from or to 1099 * its zone. The exception is when zones are using the shared stack 1100 * model. In this case, streams in the global zone have visibility 1101 * into other shared-stack zones, and broadcast and multicast traffic 1102 * is visible by all zones in the stack. 1103 */ 1104 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1105 dsttype != IPNETADDR_MBCAST) { 1106 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) && 1107 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst)) 1108 return (B_FALSE); 1109 } 1110 1111 /* 1112 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the 1113 * packet's IP version. 1114 */ 1115 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && 1116 ipnet->ipnet_family != hdr->hpo_family) 1117 return (B_FALSE); 1118 1119 /* If the destination address is ours, then accept the packet. */ 1120 if (dsttype == IPNETADDR_MYADDR) 1121 return (B_TRUE); 1122 1123 /* 1124 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are 1125 * sent or received on the interface we're observing, or packets that 1126 * have our source address (this allows us to see packets we send). 1127 */ 1128 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { 1129 if (srctype == IPNETADDR_MYADDR || obsif) 1130 return (B_TRUE); 1131 } 1132 1133 /* 1134 * We accept multicast and broadcast packets transmitted or received 1135 * on the interface we're observing. 1136 */ 1137 if (dsttype == IPNETADDR_MBCAST && obsif) 1138 return (B_TRUE); 1139 1140 return (B_FALSE); 1141 } 1142 1143 /* 1144 * Verify if the packet contained in hdr should be passed up to the ipnet 1145 * client stream that's in IPNET_LOMODE. 1146 */ 1147 /* ARGSUSED */ 1148 static boolean_t 1149 ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src, 1150 ipnet_addrp_t *dst) 1151 { 1152 if (hdr->hpo_htype != htons(IPOBS_HOOK_LOCAL)) { 1153 /* 1154 * ipnet_if is only NULL for IPNET_MINOR_LO devices. 1155 */ 1156 if (ipnet->ipnet_if == NULL) 1157 return (B_FALSE); 1158 } 1159 1160 /* 1161 * An ipnet stream must not see packets that are not from/to its zone. 1162 */ 1163 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { 1164 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) && 1165 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst)) 1166 return (B_FALSE); 1167 } 1168 1169 return (ipnet->ipnet_family == AF_UNSPEC || 1170 ipnet->ipnet_family == hdr->hpo_family); 1171 } 1172 1173 static void 1174 ipnet_dispatch(void *arg) 1175 { 1176 mblk_t *mp = arg; 1177 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr; 1178 ipnet_t *ipnet; 1179 mblk_t *netmp; 1180 list_t *list; 1181 ipnet_stack_t *ips; 1182 ipnet_addrp_t src; 1183 ipnet_addrp_t dst; 1184 1185 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 1186 1187 netmp = hdr->hpo_pkt->b_cont; 1188 src.iap_family = hdr->hpo_family; 1189 dst.iap_family = hdr->hpo_family; 1190 1191 if (hdr->hpo_family == AF_INET) { 1192 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src; 1193 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst; 1194 } else { 1195 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src; 1196 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst; 1197 } 1198 1199 ipnet_walkers_inc(ips); 1200 1201 list = &ips->ips_str_list; 1202 for (ipnet = list_head(list); ipnet != NULL; 1203 ipnet = list_next(list, ipnet)) { 1204 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) { 1205 IPSK_BUMP(ips, ik_acceptFail); 1206 continue; 1207 } 1208 IPSK_BUMP(ips, ik_acceptOk); 1209 1210 if (list_next(list, ipnet) == NULL) { 1211 netmp = hdr->hpo_pkt->b_cont; 1212 hdr->hpo_pkt->b_cont = NULL; 1213 } else { 1214 if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL && 1215 (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) { 1216 IPSK_BUMP(ips, ik_duplicationFail); 1217 continue; 1218 } 1219 } 1220 1221 if (ipnet->ipnet_flags & IPNET_INFO) { 1222 if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) { 1223 IPSK_BUMP(ips, ik_dispatchHeaderDrop); 1224 continue; 1225 } 1226 } 1227 1228 if (ipnet->ipnet_rq->q_first == NULL && 1229 canputnext(ipnet->ipnet_rq)) { 1230 putnext(ipnet->ipnet_rq, netmp); 1231 IPSK_BUMP(ips, ik_dispatchDeliver); 1232 } else if (canput(ipnet->ipnet_rq)) { 1233 (void) putq(ipnet->ipnet_rq, netmp); 1234 IPSK_BUMP(ips, ik_dispatchDeliver); 1235 } else { 1236 freemsg(netmp); 1237 IPSK_BUMP(ips, ik_dispatchPutDrop); 1238 } 1239 } 1240 1241 ipnet_walkers_dec(ips); 1242 1243 freemsg(mp); 1244 } 1245 1246 static void 1247 ipnet_input(mblk_t *mp) 1248 { 1249 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr; 1250 ipnet_stack_t *ips; 1251 1252 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 1253 1254 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != 1255 DDI_SUCCESS) { 1256 IPSK_BUMP(ips, ik_dispatchFail); 1257 freemsg(mp); 1258 } else { 1259 IPSK_BUMP(ips, ik_dispatchOk); 1260 } 1261 } 1262 1263 static ipnetif_t * 1264 ipnet_alloc_if(ipnet_stack_t *ips) 1265 { 1266 ipnetif_t *ipnetif; 1267 1268 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) 1269 return (NULL); 1270 1271 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); 1272 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), 1273 offsetof(ipnetif_addr_t, ifa_link)); 1274 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), 1275 offsetof(ipnetif_addr_t, ifa_link)); 1276 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); 1277 1278 ipnetif->if_stackp = ips; 1279 1280 return (ipnetif); 1281 } 1282 1283 /* 1284 * Create a new ipnetif_t and new minor node for it. If creation is 1285 * successful the new ipnetif_t is inserted into an avl_tree 1286 * containing ipnetif's for this stack instance. 1287 */ 1288 static ipnetif_t * 1289 ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips, 1290 uint64_t ifflags) 1291 { 1292 ipnetif_t *ipnetif; 1293 avl_index_t where = 0; 1294 minor_t ifminor; 1295 1296 /* 1297 * Because ipnetif_create() can be called from a NIC event 1298 * callback, it should not block. 1299 */ 1300 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); 1301 if (ifminor == (minor_t)-1) 1302 return (NULL); 1303 if ((ipnetif = ipnet_alloc_if(ips)) == NULL) { 1304 id_free(ipnet_minor_space, ifminor); 1305 return (NULL); 1306 } 1307 1308 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); 1309 ipnetif->if_index = (uint_t)index; 1310 ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack); 1311 ipnetif->if_dev = makedevice(ipnet_major, ifminor); 1312 1313 ipnetif->if_refcnt = 1; 1314 if ((ifflags & IFF_LOOPBACK) != 0) 1315 ipnetif->if_flags = IPNETIF_LOOPBACK; 1316 1317 mutex_enter(&ips->ips_avl_lock); 1318 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); 1319 avl_insert(&ips->ips_avl_by_index, ipnetif, where); 1320 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); 1321 avl_insert(&ips->ips_avl_by_name, ipnetif, where); 1322 mutex_exit(&ips->ips_avl_lock); 1323 1324 return (ipnetif); 1325 } 1326 1327 static void 1328 ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1329 { 1330 ipnet_t *ipnet; 1331 1332 ipnet_walkers_inc(ips); 1333 /* Send a SIGHUP to all open streams associated with this ipnetif. */ 1334 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; 1335 ipnet = list_next(&ips->ips_str_list, ipnet)) { 1336 if (ipnet->ipnet_if == ipnetif) 1337 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1338 } 1339 ipnet_walkers_dec(ips); 1340 mutex_enter(&ips->ips_avl_lock); 1341 avl_remove(&ips->ips_avl_by_index, ipnetif); 1342 avl_remove(&ips->ips_avl_by_name, ipnetif); 1343 mutex_exit(&ips->ips_avl_lock); 1344 /* 1345 * Release the reference we implicitly held in ipnetif_create(). 1346 */ 1347 ipnetif_refrele(ipnetif); 1348 } 1349 1350 static void 1351 ipnet_purge_addrlist(list_t *addrlist) 1352 { 1353 ipnetif_addr_t *ifa; 1354 1355 while ((ifa = list_head(addrlist)) != NULL) { 1356 list_remove(addrlist, ifa); 1357 if (ifa->ifa_shared != NULL) 1358 ipnetif_clone_release(ifa->ifa_shared); 1359 kmem_free(ifa, sizeof (*ifa)); 1360 } 1361 } 1362 1363 static void 1364 ipnetif_free(ipnetif_t *ipnetif) 1365 { 1366 ASSERT(ipnetif->if_refcnt == 0); 1367 ASSERT(ipnetif->if_sharecnt == 0); 1368 1369 /* Remove IPv4/v6 address lists from the ipnetif */ 1370 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); 1371 list_destroy(&ipnetif->if_ip4addr_list); 1372 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); 1373 list_destroy(&ipnetif->if_ip6addr_list); 1374 mutex_destroy(&ipnetif->if_addr_lock); 1375 mutex_destroy(&ipnetif->if_reflock); 1376 if (ipnetif->if_dev != 0) 1377 id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); 1378 kmem_free(ipnetif, sizeof (*ipnetif)); 1379 } 1380 1381 /* 1382 * Create an ipnetif_addr_t with the given logical interface id (lif) 1383 * and add it to the supplied ipnetif. The lif is the netinfo 1384 * representation of logical interface id, and we use this id to match 1385 * incoming netinfo events against our lists of addresses. 1386 */ 1387 static void 1388 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) 1389 { 1390 ipnetif_addr_t *ifaddr; 1391 zoneid_t zoneid; 1392 struct sockaddr_in bcast; 1393 struct sockaddr_storage addr; 1394 net_ifaddr_t type = NA_ADDRESS; 1395 uint64_t phyif = ipnetif->if_index; 1396 1397 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || 1398 net_getlifzone(nd, phyif, lif, &zoneid) != 0) 1399 return; 1400 1401 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) 1402 return; 1403 ifaddr->ifa_zone = zoneid; 1404 ifaddr->ifa_id = lif; 1405 ifaddr->ifa_shared = NULL; 1406 1407 switch (addr.ss_family) { 1408 case AF_INET: 1409 ifaddr->ifa_ip4addr = 1410 ((struct sockaddr_in *)&addr)->sin_addr.s_addr; 1411 /* 1412 * Try and get the broadcast address. Note that it's okay for 1413 * an interface to not have a broadcast address, so we don't 1414 * fail the entire operation if net_getlifaddr() fails here. 1415 */ 1416 type = NA_BROADCAST; 1417 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) 1418 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; 1419 break; 1420 case AF_INET6: 1421 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; 1422 break; 1423 } 1424 1425 /* 1426 * The zoneid stored in ipnetif_t needs to correspond to the actual 1427 * zone the address is being used in. This facilitates finding the 1428 * correct netstack_t pointer, amongst other things, later. 1429 */ 1430 if (zoneid == ALL_ZONES) 1431 zoneid = GLOBAL_ZONEID; 1432 1433 mutex_enter(&ipnetif->if_addr_lock); 1434 if (zoneid != ipnetif->if_zoneid) { 1435 ipnetif_t *ifp2; 1436 1437 ifp2 = ipnetif_clone_create(ipnetif, zoneid); 1438 ifaddr->ifa_shared = ifp2; 1439 } 1440 list_insert_tail(addr.ss_family == AF_INET ? 1441 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); 1442 mutex_exit(&ipnetif->if_addr_lock); 1443 } 1444 1445 static void 1446 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) 1447 { 1448 mutex_enter(&ipnetif->if_addr_lock); 1449 if (ifaddr->ifa_shared != NULL) 1450 ipnetif_clone_release(ifaddr->ifa_shared); 1451 1452 list_remove(isv6 ? 1453 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); 1454 mutex_exit(&ipnetif->if_addr_lock); 1455 kmem_free(ifaddr, sizeof (*ifaddr)); 1456 } 1457 1458 static void 1459 ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6) 1460 { 1461 ipnetif_t *ipnetif; 1462 boolean_t refrele_needed = B_TRUE; 1463 uint64_t ifflags; 1464 uint64_t ifindex; 1465 char *ifname; 1466 1467 ifflags = 0; 1468 ifname = ipne->ipne_ifname; 1469 ifindex = ipne->ipne_ifindex; 1470 1471 (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags); 1472 1473 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) { 1474 ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags); 1475 refrele_needed = B_FALSE; 1476 } 1477 if (ipnetif != NULL) { 1478 ipnetif->if_flags |= 1479 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 1480 } 1481 1482 if (ipnetif->if_multicnt != 0) { 1483 if (ip_join_allmulti(ifindex, isv6, 1484 ips->ips_netstack->netstack_ip) == 0) { 1485 ipnetif->if_flags |= 1486 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; 1487 } 1488 } 1489 1490 if (refrele_needed) 1491 ipnetif_refrele(ipnetif); 1492 } 1493 1494 static void 1495 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) 1496 { 1497 ipnetif_t *ipnetif; 1498 1499 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1500 return; 1501 1502 mutex_enter(&ipnetif->if_addr_lock); 1503 ipnet_purge_addrlist(isv6 ? 1504 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); 1505 mutex_exit(&ipnetif->if_addr_lock); 1506 1507 /* 1508 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive 1509 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif 1510 * if both IPv4 and IPv6 interfaces have been unplumbed. 1511 */ 1512 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; 1513 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) 1514 ipnetif_remove(ipnetif, ips); 1515 ipnetif_refrele(ipnetif); 1516 } 1517 1518 static void 1519 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, 1520 ipnet_stack_t *ips, boolean_t isv6) 1521 { 1522 ipnetif_t *ipnetif; 1523 ipnetif_addr_t *ifaddr; 1524 1525 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1526 return; 1527 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { 1528 /* 1529 * We must have missed a NE_LIF_DOWN event. Delete this 1530 * ifaddr and re-create it. 1531 */ 1532 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1533 } 1534 1535 ipnet_add_ifaddr(lifindex, ipnetif, nd); 1536 ipnetif_refrele(ipnetif); 1537 } 1538 1539 static void 1540 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, 1541 boolean_t isv6) 1542 { 1543 ipnetif_t *ipnetif; 1544 ipnetif_addr_t *ifaddr; 1545 1546 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1547 return; 1548 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) 1549 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1550 ipnetif_refrele(ipnetif); 1551 /* 1552 * Make sure that open streams on this ipnetif are still allowed to 1553 * have it open. 1554 */ 1555 ipnetif_zonecheck(ipnetif, ips); 1556 } 1557 1558 /* 1559 * This callback from the NIC event framework dispatches a taskq as the event 1560 * handlers may block. 1561 */ 1562 /* ARGSUSED */ 1563 static int 1564 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) 1565 { 1566 ipnet_stack_t *ips = arg; 1567 hook_nic_event_t *hn = (hook_nic_event_t *)info; 1568 ipnet_nicevent_t *ipne; 1569 1570 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) 1571 return (0); 1572 ipne->ipne_event = hn->hne_event; 1573 ipne->ipne_protocol = hn->hne_protocol; 1574 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; 1575 ipne->ipne_ifindex = hn->hne_nic; 1576 ipne->ipne_lifindex = hn->hne_lif; 1577 if (hn->hne_datalen != 0) { 1578 (void) strlcpy(ipne->ipne_ifname, hn->hne_data, 1579 sizeof (ipne->ipne_ifname)); 1580 } 1581 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, 1582 ipne, DDI_NOSLEEP); 1583 return (0); 1584 } 1585 1586 static void 1587 ipnet_nicevent_task(void *arg) 1588 { 1589 ipnet_nicevent_t *ipne = arg; 1590 netstack_t *ns; 1591 ipnet_stack_t *ips; 1592 boolean_t isv6; 1593 1594 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) 1595 goto done; 1596 ips = ns->netstack_ipnet; 1597 isv6 = (ipne->ipne_protocol == ips->ips_ndv6); 1598 1599 mutex_enter(&ips->ips_event_lock); 1600 switch (ipne->ipne_event) { 1601 case NE_PLUMB: 1602 ipnet_plumb_ev(ipne, ips, isv6); 1603 break; 1604 case NE_UNPLUMB: 1605 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); 1606 break; 1607 case NE_LIF_UP: 1608 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, 1609 ipne->ipne_protocol, ips, isv6); 1610 break; 1611 case NE_LIF_DOWN: 1612 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, 1613 isv6); 1614 break; 1615 default: 1616 break; 1617 } 1618 mutex_exit(&ips->ips_event_lock); 1619 done: 1620 if (ns != NULL) 1621 netstack_rele(ns); 1622 kmem_free(ipne, sizeof (ipnet_nicevent_t)); 1623 } 1624 1625 dev_t 1626 ipnet_if_getdev(char *name, zoneid_t zoneid) 1627 { 1628 netstack_t *ns; 1629 ipnet_stack_t *ips; 1630 ipnetif_t *ipnetif; 1631 dev_t dev = (dev_t)-1; 1632 1633 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1634 return (dev); 1635 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1636 return (dev); 1637 1638 ips = ns->netstack_ipnet; 1639 mutex_enter(&ips->ips_avl_lock); 1640 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { 1641 if (ipnetif_in_zone(ipnetif, zoneid, ips)) 1642 dev = ipnetif->if_dev; 1643 } 1644 mutex_exit(&ips->ips_avl_lock); 1645 netstack_rele(ns); 1646 1647 return (dev); 1648 } 1649 1650 static ipnetif_t * 1651 ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips) 1652 { 1653 ipnetif_t *ipnetif; 1654 1655 mutex_enter(&ips->ips_avl_lock); 1656 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) 1657 ipnetif_refhold(ipnetif); 1658 mutex_exit(&ips->ips_avl_lock); 1659 return (ipnetif); 1660 } 1661 1662 static ipnetif_t * 1663 ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips) 1664 { 1665 ipnetif_t *ipnetif; 1666 avl_tree_t *tree; 1667 1668 mutex_enter(&ips->ips_avl_lock); 1669 tree = &ips->ips_avl_by_index; 1670 for (ipnetif = avl_first(tree); ipnetif != NULL; 1671 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { 1672 if (ipnetif->if_dev == dev) { 1673 ipnetif_refhold(ipnetif); 1674 break; 1675 } 1676 } 1677 mutex_exit(&ips->ips_avl_lock); 1678 return (ipnetif); 1679 } 1680 1681 static ipnetif_addr_t * 1682 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) 1683 { 1684 ipnetif_addr_t *ifaddr; 1685 list_t *list; 1686 1687 mutex_enter(&ipnetif->if_addr_lock); 1688 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; 1689 for (ifaddr = list_head(list); ifaddr != NULL; 1690 ifaddr = list_next(list, ifaddr)) { 1691 if (lid == ifaddr->ifa_id) 1692 break; 1693 } 1694 mutex_exit(&ipnetif->if_addr_lock); 1695 return (ifaddr); 1696 } 1697 1698 /* ARGSUSED */ 1699 static void * 1700 ipnet_stack_init(netstackid_t stackid, netstack_t *ns) 1701 { 1702 ipnet_stack_t *ips; 1703 1704 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); 1705 ips->ips_netstack = ns; 1706 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); 1707 avl_create(&ips->ips_avl_by_index, ipnetif_compare_index, 1708 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); 1709 avl_create(&ips->ips_avl_by_name, ipnetif_compare_name, 1710 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); 1711 avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone, 1712 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared)); 1713 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); 1714 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); 1715 list_create(&ips->ips_str_list, sizeof (ipnet_t), 1716 offsetof(ipnet_t, ipnet_next)); 1717 ipnet_register_netihook(ips); 1718 return (ips); 1719 } 1720 1721 /* ARGSUSED */ 1722 static void 1723 ipnet_stack_fini(netstackid_t stackid, void *arg) 1724 { 1725 ipnet_stack_t *ips = arg; 1726 ipnetif_t *ipnetif, *nipnetif; 1727 1728 if (ips->ips_kstatp != NULL) { 1729 zoneid_t zoneid; 1730 1731 zoneid = netstackid_to_zoneid(stackid); 1732 net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp); 1733 } 1734 if (ips->ips_ndv4 != NULL) { 1735 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, 1736 ips->ips_nicevents) == 0); 1737 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 1738 } 1739 if (ips->ips_ndv6 != NULL) { 1740 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, 1741 ips->ips_nicevents) == 0); 1742 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 1743 } 1744 hook_free(ips->ips_nicevents); 1745 1746 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1747 ipnetif = nipnetif) { 1748 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); 1749 ipnetif_remove(ipnetif, ips); 1750 } 1751 avl_destroy(&ips->ips_avl_by_shared); 1752 avl_destroy(&ips->ips_avl_by_index); 1753 avl_destroy(&ips->ips_avl_by_name); 1754 mutex_destroy(&ips->ips_avl_lock); 1755 mutex_destroy(&ips->ips_walkers_lock); 1756 cv_destroy(&ips->ips_walkers_cv); 1757 list_destroy(&ips->ips_str_list); 1758 kmem_free(ips, sizeof (*ips)); 1759 } 1760 1761 /* Do any of the addresses in addrlist belong the supplied zoneid? */ 1762 static boolean_t 1763 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) 1764 { 1765 ipnetif_addr_t *ifa; 1766 1767 for (ifa = list_head(addrlist); ifa != NULL; 1768 ifa = list_next(addrlist, ifa)) { 1769 if (ifa->ifa_zone == zoneid) 1770 return (B_TRUE); 1771 } 1772 return (B_FALSE); 1773 } 1774 1775 /* Should the supplied ipnetif be visible from the supplied zoneid? */ 1776 static boolean_t 1777 ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) 1778 { 1779 int ret; 1780 1781 /* 1782 * The global zone has visibility into all interfaces in the global 1783 * stack, and exclusive stack zones have visibility into all 1784 * interfaces in their stack. 1785 */ 1786 if (zoneid == GLOBAL_ZONEID || 1787 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1788 return (B_TRUE); 1789 1790 /* 1791 * Shared-stack zones only have visibility for interfaces that have 1792 * addresses in their zone. 1793 */ 1794 mutex_enter(&ipnetif->if_addr_lock); 1795 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || 1796 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); 1797 mutex_exit(&ipnetif->if_addr_lock); 1798 return (ret); 1799 } 1800 1801 /* 1802 * Verify that any ipnet_t that has a reference to the supplied ipnetif should 1803 * still be allowed to have it open. A given ipnet_t may no longer be allowed 1804 * to have an ipnetif open if there are no longer any addresses that belong to 1805 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the 1806 * case, send the ipnet_t an M_HANGUP. 1807 */ 1808 static void 1809 ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1810 { 1811 list_t *strlist = &ips->ips_str_list; 1812 ipnet_t *ipnet; 1813 1814 ipnet_walkers_inc(ips); 1815 for (ipnet = list_head(strlist); ipnet != NULL; 1816 ipnet = list_next(strlist, ipnet)) { 1817 if (ipnet->ipnet_if != ipnetif) 1818 continue; 1819 if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) 1820 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1821 } 1822 ipnet_walkers_dec(ips); 1823 } 1824 1825 void 1826 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) 1827 { 1828 ipnetif_t *ipnetif; 1829 list_t cbdata; 1830 ipnetif_cbdata_t *cbnode; 1831 netstack_t *ns; 1832 ipnet_stack_t *ips; 1833 1834 /* 1835 * On labeled systems, non-global zones shouldn't see anything 1836 * in /dev/ipnet. 1837 */ 1838 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1839 return; 1840 1841 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1842 return; 1843 1844 ips = ns->netstack_ipnet; 1845 list_create(&cbdata, sizeof (ipnetif_cbdata_t), 1846 offsetof(ipnetif_cbdata_t, ic_next)); 1847 1848 mutex_enter(&ips->ips_avl_lock); 1849 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1850 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { 1851 if (!ipnetif_in_zone(ipnetif, zoneid, ips)) 1852 continue; 1853 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); 1854 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); 1855 cbnode->ic_dev = ipnetif->if_dev; 1856 list_insert_head(&cbdata, cbnode); 1857 } 1858 mutex_exit(&ips->ips_avl_lock); 1859 1860 while ((cbnode = list_head(&cbdata)) != NULL) { 1861 cb(cbnode->ic_ifname, arg, cbnode->ic_dev); 1862 list_remove(&cbdata, cbnode); 1863 kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); 1864 } 1865 list_destroy(&cbdata); 1866 netstack_rele(ns); 1867 } 1868 1869 static int 1870 ipnetif_compare_index(const void *index_ptr, const void *ipnetifp) 1871 { 1872 int64_t index1 = *((int64_t *)index_ptr); 1873 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; 1874 1875 return (SIGNOF(index2 - index1)); 1876 } 1877 1878 static int 1879 ipnetif_compare_name(const void *name_ptr, const void *ipnetifp) 1880 { 1881 int res; 1882 1883 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); 1884 return (SIGNOF(res)); 1885 } 1886 1887 static int 1888 ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp) 1889 { 1890 const uintptr_t *ptr = key_ptr; 1891 const ipnetif_t *ifp; 1892 int res; 1893 1894 ifp = ipnetifp; 1895 res = ifp->if_zoneid - ptr[0]; 1896 if (res != 0) 1897 return (SIGNOF(res)); 1898 res = strcmp(ifp->if_name, (char *)ptr[1]); 1899 return (SIGNOF(res)); 1900 } 1901 1902 static void 1903 ipnetif_refhold(ipnetif_t *ipnetif) 1904 { 1905 mutex_enter(&ipnetif->if_reflock); 1906 ipnetif->if_refcnt++; 1907 mutex_exit(&ipnetif->if_reflock); 1908 } 1909 1910 static void 1911 ipnetif_refrele(ipnetif_t *ipnetif) 1912 { 1913 mutex_enter(&ipnetif->if_reflock); 1914 ASSERT(ipnetif->if_refcnt > 0); 1915 if (--ipnetif->if_refcnt == 0) 1916 ipnetif_free(ipnetif); 1917 else 1918 mutex_exit(&ipnetif->if_reflock); 1919 } 1920 1921 static void 1922 ipnet_walkers_inc(ipnet_stack_t *ips) 1923 { 1924 mutex_enter(&ips->ips_walkers_lock); 1925 ips->ips_walkers_cnt++; 1926 mutex_exit(&ips->ips_walkers_lock); 1927 } 1928 1929 static void 1930 ipnet_walkers_dec(ipnet_stack_t *ips) 1931 { 1932 mutex_enter(&ips->ips_walkers_lock); 1933 ASSERT(ips->ips_walkers_cnt != 0); 1934 if (--ips->ips_walkers_cnt == 0) 1935 cv_broadcast(&ips->ips_walkers_cv); 1936 mutex_exit(&ips->ips_walkers_lock); 1937 } 1938 1939 /*ARGSUSED*/ 1940 static int 1941 ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg) 1942 { 1943 hook_pkt_observe_t *hdr; 1944 pfv_t func = (pfv_t)arg; 1945 mblk_t *mp; 1946 1947 hdr = (hook_pkt_observe_t *)info; 1948 /* 1949 * Code in ip_input() expects that it is the only one accessing the 1950 * packet. 1951 */ 1952 mp = copymsg(hdr->hpo_pkt); 1953 if (mp == NULL) { 1954 netstack_t *ns = hdr->hpo_ctx; 1955 ipnet_stack_t *ips = ns->netstack_ipnet; 1956 1957 IPSK_BUMP(ips, ik_dispatchDupDrop); 1958 return (0); 1959 } 1960 1961 hdr = (hook_pkt_observe_t *)mp->b_rptr; 1962 hdr->hpo_pkt = mp; 1963 1964 func(mp); 1965 1966 return (0); 1967 } 1968 1969 hook_t * 1970 ipobs_register_hook(netstack_t *ns, pfv_t func) 1971 { 1972 ip_stack_t *ipst = ns->netstack_ip; 1973 char name[32]; 1974 hook_t *hook; 1975 1976 HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func); 1977 VERIFY(hook != NULL); 1978 1979 /* 1980 * To register multiple hooks with the same callback function, 1981 * a unique name is needed. 1982 */ 1983 (void) snprintf(name, sizeof (name), "ipobserve_%p", (void *)hook); 1984 hook->h_name = strdup(name); 1985 1986 (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook); 1987 (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook); 1988 1989 return (hook); 1990 } 1991 1992 void 1993 ipobs_unregister_hook(netstack_t *ns, hook_t *hook) 1994 { 1995 ip_stack_t *ipst = ns->netstack_ip; 1996 1997 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook); 1998 1999 (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook); 2000 2001 strfree(hook->h_name); 2002 2003 hook_free(hook); 2004 } 2005 2006 /* ******************************************************************** */ 2007 /* BPF Functions below */ 2008 /* ******************************************************************** */ 2009 2010 /* 2011 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy. 2012 */ 2013 ipnet_stack_t * 2014 ipnet_find_by_zoneid(zoneid_t zoneid) 2015 { 2016 netstack_t *ns; 2017 2018 VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL); 2019 return (ns->netstack_ipnet); 2020 } 2021 2022 /* 2023 * Functions, such as the above ipnet_find_by_zoneid(), will return a 2024 * pointer to ipnet_stack_t by calling a netstack lookup function. 2025 * The netstack_find_*() functions return a pointer after doing a "hold" 2026 * on the data structure and thereby require a "release" when the caller 2027 * is finished with it. We need to mirror that API here and thus a caller 2028 * of ipnet_find_by_zoneid() is required to call ipnet_rele(). 2029 */ 2030 void 2031 ipnet_rele(ipnet_stack_t *ips) 2032 { 2033 netstack_rele(ips->ips_netstack); 2034 } 2035 2036 /* 2037 */ 2038 void 2039 ipnet_set_itap(bpf_itap_fn_t tapfunc) 2040 { 2041 ipnet_itap = tapfunc; 2042 } 2043 2044 /* 2045 * The list of interfaces available via ipnet is private for each zone, 2046 * so the AVL tree of each zone must be searched for a given name, even 2047 * if all names are unique. 2048 */ 2049 int 2050 ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid) 2051 { 2052 ipnet_stack_t *ips; 2053 ipnetif_t *ipnetif; 2054 2055 ASSERT(ptr != NULL); 2056 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL); 2057 2058 mutex_enter(&ips->ips_avl_lock); 2059 2060 /* 2061 * Shared instance zone? 2062 */ 2063 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) { 2064 uintptr_t key[2] = { zoneid, (uintptr_t)name }; 2065 2066 ipnetif = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL); 2067 } else { 2068 ipnetif = avl_find(&ips->ips_avl_by_name, (void *)name, NULL); 2069 } 2070 if (ipnetif != NULL) 2071 ipnetif_refhold(ipnetif); 2072 mutex_exit(&ips->ips_avl_lock); 2073 2074 *ptr = ipnetif; 2075 ipnet_rele(ips); 2076 2077 if (ipnetif == NULL) 2078 return (ESRCH); 2079 return (0); 2080 } 2081 2082 void 2083 ipnet_close_byhandle(ipnetif_t *ifp) 2084 { 2085 ASSERT(ifp != NULL); 2086 ipnetif_refrele(ifp); 2087 } 2088 2089 const char * 2090 ipnet_name(ipnetif_t *ifp) 2091 { 2092 ASSERT(ifp != NULL); 2093 return (ifp->if_name); 2094 } 2095 2096 /* 2097 * To find the linkid for a given name, it is necessary to know which zone 2098 * the interface name belongs to and to search the avl tree for that zone 2099 * as there is no master list of all interfaces and which zone they belong 2100 * to. It is assumed that the caller of this function is somehow already 2101 * working with the ipnet interfaces and hence the ips_event_lock is held. 2102 * When BPF calls into this function, it is doing so because of an event 2103 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id 2104 * value returned has meaning without the need for grabbing a hold on the 2105 * owning structure. 2106 */ 2107 int 2108 ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid) 2109 { 2110 ipnet_stack_t *ips; 2111 ipnetif_t *ifp; 2112 2113 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL); 2114 ASSERT(mutex_owned(&ips->ips_event_lock)); 2115 2116 mutex_enter(&ips->ips_avl_lock); 2117 ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL); 2118 if (ifp != NULL) 2119 *idp = (uint_t)ifp->if_index; 2120 2121 /* 2122 * Shared instance zone? 2123 */ 2124 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) { 2125 uintptr_t key[2] = { zoneid, (uintptr_t)name }; 2126 2127 ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL); 2128 if (ifp != NULL) 2129 *idp = (uint_t)ifp->if_index; 2130 } 2131 2132 mutex_exit(&ips->ips_avl_lock); 2133 ipnet_rele(ips); 2134 2135 if (ifp == NULL) 2136 return (ESRCH); 2137 return (0); 2138 } 2139 2140 /* 2141 * Strictly speaking, there is no such thing as a "client" in ipnet, like 2142 * there is in mac. BPF only needs to have this because it is required as 2143 * part of interfacing correctly with mac. The reuse of the original 2144 * ipnetif_t as a client poses no danger, so long as it is done with its 2145 * own ref-count'd hold that is given up on close. 2146 */ 2147 int 2148 ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result) 2149 { 2150 ASSERT(ptr != NULL); 2151 ASSERT(result != NULL); 2152 ipnetif_refhold(ptr); 2153 *result = ptr; 2154 2155 return (0); 2156 } 2157 2158 void 2159 ipnet_client_close(ipnetif_t *ptr) 2160 { 2161 ASSERT(ptr != NULL); 2162 ipnetif_refrele(ptr); 2163 } 2164 2165 /* 2166 * This is called from BPF when it needs to start receiving packets 2167 * from ipnet. 2168 * 2169 * The use of the ipnet_t structure here is somewhat lightweight when 2170 * compared to how it is used elsewhere but it already has all of the 2171 * right fields in it, so reuse here doesn't seem out of order. Its 2172 * primary purpose here is to provide the means to store pointers for 2173 * use when ipnet_promisc_remove() needs to be called. 2174 * 2175 * This should never be called for the IPNET_MINOR_LO device as it is 2176 * never created via ipnetif_create. 2177 */ 2178 /*ARGSUSED*/ 2179 int 2180 ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle, 2181 int flags) 2182 { 2183 ip_stack_t *ipst; 2184 netstack_t *ns; 2185 ipnetif_t *ifp; 2186 ipnet_t *ipnet; 2187 char name[32]; 2188 int error; 2189 2190 ifp = (ipnetif_t *)handle; 2191 2192 if (how != DL_PROMISC_PHYS && how != DL_PROMISC_MULTI) 2193 return (EINVAL); 2194 2195 ns = netstack_find_by_zoneid(ifp->if_zoneid); 2196 2197 if ((error = ipnet_join_allmulti(ifp, ns->netstack_ipnet)) != 0) { 2198 netstack_rele(ns); 2199 return (error); 2200 } 2201 2202 ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP); 2203 ipnet->ipnet_if = ifp; 2204 ipnet->ipnet_ns = ns; 2205 ipnet->ipnet_flags = flags; 2206 2207 if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) { 2208 ipnet->ipnet_acceptfn = ipnet_loaccept; 2209 } else { 2210 ipnet->ipnet_acceptfn = ipnet_accept; 2211 } 2212 2213 /* 2214 * To register multiple hooks with the same callback function, 2215 * a unique name is needed. 2216 */ 2217 HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet); 2218 (void) snprintf(name, sizeof (name), "ipnet_promisc_%p", 2219 (void *)ipnet->ipnet_hook); 2220 ipnet->ipnet_hook->h_name = strdup(name); 2221 ipnet->ipnet_data = data; 2222 ipnet->ipnet_zoneid = ifp->if_zoneid; 2223 2224 ipst = ns->netstack_ip; 2225 2226 error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, 2227 ipnet->ipnet_hook); 2228 if (error != 0) 2229 goto regfail; 2230 2231 error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, 2232 ipnet->ipnet_hook); 2233 if (error != 0) { 2234 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, 2235 NH_OBSERVE, ipnet->ipnet_hook); 2236 goto regfail; 2237 } 2238 2239 *mhandle = (uintptr_t)ipnet; 2240 netstack_rele(ns); 2241 2242 return (0); 2243 2244 regfail: 2245 cmn_err(CE_WARN, "net_hook_register failed: %d", error); 2246 strfree(ipnet->ipnet_hook->h_name); 2247 hook_free(ipnet->ipnet_hook); 2248 netstack_rele(ns); 2249 return (error); 2250 } 2251 2252 void 2253 ipnet_promisc_remove(void *data) 2254 { 2255 ip_stack_t *ipst; 2256 ipnet_t *ipnet; 2257 hook_t *hook; 2258 2259 ipnet = data; 2260 ipst = ipnet->ipnet_ns->netstack_ip; 2261 hook = ipnet->ipnet_hook; 2262 2263 VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, 2264 hook) == 0); 2265 2266 VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, 2267 hook) == 0); 2268 2269 strfree(hook->h_name); 2270 2271 hook_free(hook); 2272 2273 kmem_free(ipnet, sizeof (*ipnet)); 2274 } 2275 2276 /* 2277 * arg here comes from the ipnet_t allocated in ipnet_promisc_add. 2278 * An important field from that structure is "ipnet_data" that 2279 * contains the "data" pointer passed into ipnet_promisc_add: it needs 2280 * to be passed back to bpf when we call into ipnet_itap. 2281 * 2282 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called 2283 * from BPF. 2284 */ 2285 /*ARGSUSED*/ 2286 static int 2287 ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg) 2288 { 2289 hook_pkt_observe_t *hdr; 2290 ipnet_addrp_t src; 2291 ipnet_addrp_t dst; 2292 ipnet_stack_t *ips; 2293 ipnet_t *ipnet; 2294 mblk_t *netmp; 2295 mblk_t *mp; 2296 2297 hdr = (hook_pkt_observe_t *)info; 2298 mp = hdr->hpo_pkt; 2299 ipnet = (ipnet_t *)arg; 2300 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 2301 2302 netmp = hdr->hpo_pkt->b_cont; 2303 src.iap_family = hdr->hpo_family; 2304 dst.iap_family = hdr->hpo_family; 2305 2306 if (hdr->hpo_family == AF_INET) { 2307 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src; 2308 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst; 2309 } else { 2310 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src; 2311 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst; 2312 } 2313 2314 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) { 2315 IPSK_BUMP(ips, ik_acceptFail); 2316 return (0); 2317 } 2318 IPSK_BUMP(ips, ik_acceptOk); 2319 2320 ipnet_itap(ipnet->ipnet_data, mp, 2321 hdr->hpo_htype == htons(IPOBS_HOOK_OUTBOUND), 2322 ntohl(hdr->hpo_pktlen) + MBLKL(mp)); 2323 2324 return (0); 2325 } 2326 2327 /* 2328 * clone'd ipnetif_t's are created when a shared IP instance zone comes 2329 * to life and configures an IP address. The model that BPF uses is that 2330 * each interface must have a unique pointer and each interface must be 2331 * representative of what it can capture. They are limited to one DLT 2332 * per interface and one zone per interface. Thus every interface that 2333 * can be seen in a zone must be announced via an attach to bpf. For 2334 * shared instance zones, this means the ipnet driver needs to detect 2335 * when an address is added to an interface in a zone for the first 2336 * time (and also when the last address is removed.) 2337 */ 2338 static ipnetif_t * 2339 ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid) 2340 { 2341 uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name }; 2342 ipnet_stack_t *ips = ifp->if_stackp; 2343 avl_index_t where = 0; 2344 ipnetif_t *newif; 2345 2346 mutex_enter(&ips->ips_avl_lock); 2347 newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where); 2348 if (newif != NULL) { 2349 ipnetif_refhold(newif); 2350 newif->if_sharecnt++; 2351 mutex_exit(&ips->ips_avl_lock); 2352 return (newif); 2353 } 2354 2355 newif = ipnet_alloc_if(ips); 2356 if (newif == NULL) { 2357 mutex_exit(&ips->ips_avl_lock); 2358 return (NULL); 2359 } 2360 2361 newif->if_refcnt = 1; 2362 newif->if_sharecnt = 1; 2363 newif->if_zoneid = zoneid; 2364 (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ); 2365 newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK; 2366 newif->if_index = ifp->if_index; 2367 2368 avl_insert(&ips->ips_avl_by_shared, newif, where); 2369 mutex_exit(&ips->ips_avl_lock); 2370 2371 return (newif); 2372 } 2373 2374 static void 2375 ipnetif_clone_release(ipnetif_t *ipnetif) 2376 { 2377 boolean_t dofree = B_FALSE; 2378 boolean_t doremove = B_FALSE; 2379 ipnet_stack_t *ips = ipnetif->if_stackp; 2380 2381 mutex_enter(&ipnetif->if_reflock); 2382 ASSERT(ipnetif->if_refcnt > 0); 2383 if (--ipnetif->if_refcnt == 0) 2384 dofree = B_TRUE; 2385 ASSERT(ipnetif->if_sharecnt > 0); 2386 if (--ipnetif->if_sharecnt == 0) 2387 doremove = B_TRUE; 2388 mutex_exit(&ipnetif->if_reflock); 2389 if (doremove) { 2390 mutex_enter(&ips->ips_avl_lock); 2391 avl_remove(&ips->ips_avl_by_shared, ipnetif); 2392 mutex_exit(&ips->ips_avl_lock); 2393 } 2394 if (dofree) { 2395 ASSERT(ipnetif->if_sharecnt == 0); 2396 ipnetif_free(ipnetif); 2397 } 2398 } 2399