1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * The ipnet device defined here provides access to packets at the IP layer. To 29 * provide access to packets at this layer it registers a callback function in 30 * the ip module and when there are open instances of the device ip will pass 31 * packets into the device. Packets from ip are passed on the input, output and 32 * loopback paths. Internally the module returns to ip as soon as possible by 33 * deferring processing using a taskq. 34 * 35 * Management of the devices in /dev/ipnet/ is handled by the devname 36 * filesystem and use of the neti interfaces. This module registers for NIC 37 * events using the neti framework so that when IP interfaces are bought up, 38 * taken down etc. the ipnet module is notified and its view of the interfaces 39 * configured on the system adjusted. On attach, the module gets an initial 40 * view of the system again using the neti framework but as it has already 41 * registered for IP interface events, it is still up-to-date with any changes. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/conf.h> 46 #include <sys/cred.h> 47 #include <sys/stat.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/modctl.h> 51 #include <sys/dlpi.h> 52 #include <sys/strsun.h> 53 #include <sys/id_space.h> 54 #include <sys/kmem.h> 55 #include <sys/mkdev.h> 56 #include <sys/neti.h> 57 #include <net/if.h> 58 #include <sys/errno.h> 59 #include <sys/list.h> 60 #include <sys/ksynch.h> 61 #include <sys/hook_event.h> 62 #include <sys/sdt.h> 63 #include <sys/stropts.h> 64 #include <sys/sysmacros.h> 65 #include <inet/ip.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip6.h> 69 #include <inet/ipnet.h> 70 #include <net/bpf.h> 71 #include <net/bpfdesc.h> 72 #include <net/dlt.h> 73 74 static struct module_info ipnet_minfo = { 75 1, /* mi_idnum */ 76 "ipnet", /* mi_idname */ 77 0, /* mi_minpsz */ 78 INFPSZ, /* mi_maxpsz */ 79 2048, /* mi_hiwat */ 80 0 /* mi_lowat */ 81 }; 82 83 /* 84 * List to hold static view of ipnetif_t's on the system. This is needed to 85 * avoid holding the lock protecting the avl tree of ipnetif's over the 86 * callback into the dev filesystem. 87 */ 88 typedef struct ipnetif_cbdata { 89 char ic_ifname[LIFNAMSIZ]; 90 dev_t ic_dev; 91 list_node_t ic_next; 92 } ipnetif_cbdata_t; 93 94 /* 95 * Convenience enumerated type for ipnet_accept(). It describes the 96 * properties of a given ipnet_addrp_t relative to a single ipnet_t 97 * client stream. The values represent whether the address is ... 98 */ 99 typedef enum { 100 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ 101 IPNETADDR_MBCAST, /* a multicast or broadcast address. */ 102 IPNETADDR_UNKNOWN /* none of the above. */ 103 } ipnet_addrtype_t; 104 105 /* Argument used for the ipnet_nicevent_taskq callback. */ 106 typedef struct ipnet_nicevent_s { 107 nic_event_t ipne_event; 108 net_handle_t ipne_protocol; 109 netstackid_t ipne_stackid; 110 uint64_t ipne_ifindex; 111 uint64_t ipne_lifindex; 112 char ipne_ifname[LIFNAMSIZ]; 113 } ipnet_nicevent_t; 114 115 static dev_info_t *ipnet_dip; 116 static major_t ipnet_major; 117 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ 118 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ 119 static id_space_t *ipnet_minor_space; 120 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ 121 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ 122 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; 123 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; 124 static bpf_itap_fn_t ipnet_itap; 125 126 static void ipnet_input(mblk_t *); 127 static int ipnet_wput(queue_t *, mblk_t *); 128 static int ipnet_rsrv(queue_t *); 129 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); 130 static int ipnet_close(queue_t *); 131 static void ipnet_ioctl(queue_t *, mblk_t *); 132 static void ipnet_iocdata(queue_t *, mblk_t *); 133 static void ipnet_wputnondata(queue_t *, mblk_t *); 134 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); 135 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); 136 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 137 static void ipnet_inforeq(queue_t *q, mblk_t *mp); 138 static void ipnet_bindreq(queue_t *q, mblk_t *mp); 139 static void ipnet_unbindreq(queue_t *q, mblk_t *mp); 140 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); 141 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); 142 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); 143 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); 144 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); 145 static void ipnet_nicevent_task(void *); 146 static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *, 147 uint64_t); 148 static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *); 149 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); 150 static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *); 151 static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *); 152 static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); 153 static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *); 154 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); 155 static int ipnetif_compare_name(const void *, const void *); 156 static int ipnetif_compare_name_zone(const void *, const void *); 157 static int ipnetif_compare_index(const void *, const void *); 158 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); 159 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); 160 static void ipnetif_refhold(ipnetif_t *); 161 static void ipnetif_refrele(ipnetif_t *); 162 static void ipnet_walkers_inc(ipnet_stack_t *); 163 static void ipnet_walkers_dec(ipnet_stack_t *); 164 static void ipnet_register_netihook(ipnet_stack_t *); 165 static void *ipnet_stack_init(netstackid_t, netstack_t *); 166 static void ipnet_stack_fini(netstackid_t, void *); 167 static void ipnet_dispatch(void *); 168 static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *); 169 static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *); 170 static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t); 171 static void ipnetif_clone_release(ipnetif_t *); 172 173 static struct qinit ipnet_rinit = { 174 NULL, /* qi_putp */ 175 ipnet_rsrv, /* qi_srvp */ 176 ipnet_open, /* qi_qopen */ 177 ipnet_close, /* qi_qclose */ 178 NULL, /* qi_qadmin */ 179 &ipnet_minfo, /* qi_minfo */ 180 }; 181 182 static struct qinit ipnet_winit = { 183 ipnet_wput, /* qi_putp */ 184 NULL, /* qi_srvp */ 185 NULL, /* qi_qopen */ 186 NULL, /* qi_qclose */ 187 NULL, /* qi_qadmin */ 188 &ipnet_minfo, /* qi_minfo */ 189 }; 190 191 static struct streamtab ipnet_info = { 192 &ipnet_rinit, &ipnet_winit 193 }; 194 195 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, 196 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, 197 ddi_quiesce_not_supported); 198 199 static struct modldrv modldrv = { 200 &mod_driverops, 201 "STREAMS ipnet driver", 202 &ipnet_ops 203 }; 204 205 static struct modlinkage modlinkage = { 206 MODREV_1, &modldrv, NULL 207 }; 208 209 /* 210 * This structure contains the template data (names and type) that is 211 * copied, in bulk, into the new kstats structure created by net_kstat_create. 212 * No actual statistical information is stored in this instance of the 213 * ipnet_kstats_t structure. 214 */ 215 static ipnet_kstats_t stats_template = { 216 { "duplicationFail", KSTAT_DATA_UINT64 }, 217 { "dispatchOk", KSTAT_DATA_UINT64 }, 218 { "dispatchFail", KSTAT_DATA_UINT64 }, 219 { "dispatchHeaderDrop", KSTAT_DATA_UINT64 }, 220 { "dispatchDupDrop", KSTAT_DATA_UINT64 }, 221 { "dispatchPutDrop", KSTAT_DATA_UINT64 }, 222 { "dispatchDeliver", KSTAT_DATA_UINT64 }, 223 { "acceptOk", KSTAT_DATA_UINT64 }, 224 { "acceptFail", KSTAT_DATA_UINT64 } 225 }; 226 227 /* 228 * Walk the list of physical interfaces on the machine, for each 229 * interface create a new ipnetif_t and add any addresses to it. We 230 * need to do the walk twice, once for IPv4 and once for IPv6. 231 * 232 * The interfaces are destroyed as part of ipnet_stack_fini() for each 233 * stack. Note that we cannot do this initialization in 234 * ipnet_stack_init(), since ipnet_stack_init() cannot fail. 235 */ 236 static int 237 ipnetif_init(void) 238 { 239 netstack_handle_t nh; 240 netstack_t *ns; 241 ipnet_stack_t *ips; 242 int ret = 0; 243 244 netstack_next_init(&nh); 245 while ((ns = netstack_next(&nh)) != NULL) { 246 ips = ns->netstack_ipnet; 247 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0) 248 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE); 249 netstack_rele(ns); 250 if (ret != 0) 251 break; 252 } 253 netstack_next_fini(&nh); 254 return (ret); 255 } 256 257 /* 258 * Standard module entry points. 259 */ 260 int 261 _init(void) 262 { 263 int ret; 264 boolean_t netstack_registered = B_FALSE; 265 266 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) 267 return (ENODEV); 268 ipnet_minor_space = id_space_create("ipnet_minor_space", 269 IPNET_MINOR_MIN, MAXMIN32); 270 271 /* 272 * We call ddi_taskq_create() with nthread == 1 to ensure in-order 273 * delivery of packets to clients. Note that we need to create the 274 * taskqs before calling netstack_register() since ipnet_stack_init() 275 * registers callbacks that use 'em. 276 */ 277 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); 278 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", 279 1, TASKQ_DEFAULTPRI, 0); 280 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { 281 ret = ENOMEM; 282 goto done; 283 } 284 285 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); 286 netstack_registered = B_TRUE; 287 288 if ((ret = ipnetif_init()) == 0) 289 ret = mod_install(&modlinkage); 290 done: 291 if (ret != 0) { 292 if (ipnet_taskq != NULL) 293 ddi_taskq_destroy(ipnet_taskq); 294 if (ipnet_nicevent_taskq != NULL) 295 ddi_taskq_destroy(ipnet_nicevent_taskq); 296 if (netstack_registered) 297 netstack_unregister(NS_IPNET); 298 id_space_destroy(ipnet_minor_space); 299 } 300 return (ret); 301 } 302 303 int 304 _fini(void) 305 { 306 int err; 307 308 if ((err = mod_remove(&modlinkage)) != 0) 309 return (err); 310 311 netstack_unregister(NS_IPNET); 312 ddi_taskq_destroy(ipnet_nicevent_taskq); 313 ddi_taskq_destroy(ipnet_taskq); 314 id_space_destroy(ipnet_minor_space); 315 return (0); 316 } 317 318 int 319 _info(struct modinfo *modinfop) 320 { 321 return (mod_info(&modlinkage, modinfop)); 322 } 323 324 static void 325 ipnet_register_netihook(ipnet_stack_t *ips) 326 { 327 int ret; 328 zoneid_t zoneid; 329 netid_t netid; 330 331 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", 332 ips); 333 334 /* 335 * It is possible for an exclusive stack to be in the process of 336 * shutting down here, and the netid and protocol lookups could fail 337 * in that case. 338 */ 339 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid); 340 if ((netid = net_zoneidtonetid(zoneid)) == -1) 341 return; 342 343 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) { 344 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, 345 ips->ips_nicevents)) != 0) { 346 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 347 ips->ips_ndv4 = NULL; 348 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks" 349 " in zone %d: %d", zoneid, ret); 350 } 351 } 352 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) { 353 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, 354 ips->ips_nicevents)) != 0) { 355 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 356 ips->ips_ndv6 = NULL; 357 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks" 358 " in zone %d: %d", zoneid, ret); 359 } 360 } 361 362 /* 363 * Create a local set of kstats for each zone. 364 */ 365 ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats", 366 "misc", KSTAT_TYPE_NAMED, 367 sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0); 368 if (ips->ips_kstatp != NULL) { 369 bcopy(&stats_template, &ips->ips_stats, 370 sizeof (ips->ips_stats)); 371 ips->ips_kstatp->ks_data = &ips->ips_stats; 372 ips->ips_kstatp->ks_private = 373 (void *)(uintptr_t)ips->ips_netstack->netstack_stackid; 374 kstat_install(ips->ips_kstatp); 375 } else { 376 cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed", 377 "ipnet", "ipnet_stats", "misc"); 378 } 379 } 380 381 /* 382 * This function is called on attach to build an initial view of the 383 * interfaces on the system. It will be called once for IPv4 and once 384 * for IPv6, although there is only one ipnet interface for both IPv4 385 * and IPv6 there are separate address lists. 386 */ 387 static int 388 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) 389 { 390 phy_if_t phyif; 391 lif_if_t lif; 392 ipnetif_t *ipnetif; 393 char name[LIFNAMSIZ]; 394 boolean_t new_if = B_FALSE; 395 uint64_t ifflags; 396 int ret = 0; 397 398 /* 399 * If ipnet_register_netihook() was unable to initialize this 400 * stack's net_handle_t, then we cannot populate any interface 401 * information. This usually happens when we attempted to 402 * grab a net_handle_t as a stack was shutting down. We don't 403 * want to fail the entire _init() operation because of a 404 * stack shutdown (other stacks will continue to work just 405 * fine), so we silently return success here. 406 */ 407 if (nd == NULL) 408 return (0); 409 410 /* 411 * Make sure we're not processing NIC events during the 412 * population of our interfaces and address lists. 413 */ 414 mutex_enter(&ips->ips_event_lock); 415 416 for (phyif = net_phygetnext(nd, 0); phyif != 0; 417 phyif = net_phygetnext(nd, phyif)) { 418 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) 419 continue; 420 ifflags = 0; 421 (void) net_getlifflags(nd, phyif, 0, &ifflags); 422 if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) { 423 ipnetif = ipnetif_create(name, phyif, ips, ifflags); 424 if (ipnetif == NULL) { 425 ret = ENOMEM; 426 goto done; 427 } 428 new_if = B_TRUE; 429 } 430 ipnetif->if_flags |= 431 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 432 433 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; 434 lif = net_lifgetnext(nd, phyif, lif)) { 435 /* 436 * Skip addresses that aren't up. We'll add 437 * them when we receive an NE_LIF_UP event. 438 */ 439 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || 440 !(ifflags & IFF_UP)) 441 continue; 442 /* Don't add it if we already have it. */ 443 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) 444 continue; 445 ipnet_add_ifaddr(lif, ipnetif, nd); 446 } 447 if (!new_if) 448 ipnetif_refrele(ipnetif); 449 } 450 451 done: 452 mutex_exit(&ips->ips_event_lock); 453 return (ret); 454 } 455 456 static int 457 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 458 { 459 if (cmd != DDI_ATTACH) 460 return (DDI_FAILURE); 461 462 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, 463 DDI_PSEUDO, 0) == DDI_FAILURE) 464 return (DDI_FAILURE); 465 466 ipnet_dip = dip; 467 return (DDI_SUCCESS); 468 } 469 470 static int 471 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 472 { 473 if (cmd != DDI_DETACH) 474 return (DDI_FAILURE); 475 476 ASSERT(dip == ipnet_dip); 477 ddi_remove_minor_node(ipnet_dip, NULL); 478 ipnet_dip = NULL; 479 return (DDI_SUCCESS); 480 } 481 482 /* ARGSUSED */ 483 static int 484 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 485 { 486 int error = DDI_FAILURE; 487 488 switch (infocmd) { 489 case DDI_INFO_DEVT2INSTANCE: 490 *result = (void *)0; 491 error = DDI_SUCCESS; 492 break; 493 case DDI_INFO_DEVT2DEVINFO: 494 if (ipnet_dip != NULL) { 495 *result = ipnet_dip; 496 error = DDI_SUCCESS; 497 } 498 break; 499 } 500 return (error); 501 } 502 503 /* ARGSUSED */ 504 static int 505 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) 506 { 507 ipnet_t *ipnet; 508 netstack_t *ns = NULL; 509 ipnet_stack_t *ips; 510 int err = 0; 511 zoneid_t zoneid = crgetzoneid(crp); 512 513 /* 514 * If the system is labeled, only the global zone is allowed to open 515 * IP observability nodes. 516 */ 517 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 518 return (EACCES); 519 520 /* We don't support open as a module */ 521 if (sflag & MODOPEN) 522 return (ENOTSUP); 523 524 /* This driver is self-cloning, we don't support re-open. */ 525 if (rq->q_ptr != NULL) 526 return (EBUSY); 527 528 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) 529 return (ENOMEM); 530 531 VERIFY((ns = netstack_find_by_cred(crp)) != NULL); 532 ips = ns->netstack_ipnet; 533 534 rq->q_ptr = WR(rq)->q_ptr = ipnet; 535 ipnet->ipnet_rq = rq; 536 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); 537 ipnet->ipnet_zoneid = zoneid; 538 ipnet->ipnet_dlstate = DL_UNBOUND; 539 ipnet->ipnet_ns = ns; 540 541 /* 542 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need 543 * to be processed after ipnet_if is set and the ipnet_t has been 544 * inserted in the ips_str_list. 545 */ 546 mutex_enter(&ips->ips_event_lock); 547 if (getminor(*dev) == IPNET_MINOR_LO) { 548 ipnet->ipnet_flags |= IPNET_LOMODE; 549 ipnet->ipnet_acceptfn = ipnet_loaccept; 550 } else { 551 ipnet->ipnet_acceptfn = ipnet_accept; 552 ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips); 553 if (ipnet->ipnet_if == NULL || 554 !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) { 555 err = ENODEV; 556 goto done; 557 } 558 } 559 560 mutex_enter(&ips->ips_walkers_lock); 561 while (ips->ips_walkers_cnt != 0) 562 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 563 list_insert_head(&ips->ips_str_list, ipnet); 564 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); 565 qprocson(rq); 566 567 /* 568 * Only register our callback if we're the first open client; we call 569 * unregister in close() for the last open client. 570 */ 571 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) 572 ips->ips_hook = ipobs_register_hook(ns, ipnet_input); 573 mutex_exit(&ips->ips_walkers_lock); 574 575 done: 576 mutex_exit(&ips->ips_event_lock); 577 if (err != 0) { 578 netstack_rele(ns); 579 id_free(ipnet_minor_space, ipnet->ipnet_minor); 580 if (ipnet->ipnet_if != NULL) 581 ipnetif_refrele(ipnet->ipnet_if); 582 kmem_free(ipnet, sizeof (*ipnet)); 583 } 584 return (err); 585 } 586 587 static int 588 ipnet_close(queue_t *rq) 589 { 590 ipnet_t *ipnet = rq->q_ptr; 591 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; 592 593 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 594 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 595 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 596 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 597 598 mutex_enter(&ips->ips_walkers_lock); 599 while (ips->ips_walkers_cnt != 0) 600 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 601 602 qprocsoff(rq); 603 604 list_remove(&ips->ips_str_list, ipnet); 605 if (ipnet->ipnet_if != NULL) 606 ipnetif_refrele(ipnet->ipnet_if); 607 id_free(ipnet_minor_space, ipnet->ipnet_minor); 608 609 if (list_is_empty(&ips->ips_str_list)) { 610 ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook); 611 ips->ips_hook = NULL; 612 } 613 614 kmem_free(ipnet, sizeof (*ipnet)); 615 616 mutex_exit(&ips->ips_walkers_lock); 617 netstack_rele(ips->ips_netstack); 618 return (0); 619 } 620 621 static int 622 ipnet_wput(queue_t *q, mblk_t *mp) 623 { 624 switch (mp->b_datap->db_type) { 625 case M_FLUSH: 626 if (*mp->b_rptr & FLUSHW) { 627 flushq(q, FLUSHDATA); 628 *mp->b_rptr &= ~FLUSHW; 629 } 630 if (*mp->b_rptr & FLUSHR) 631 qreply(q, mp); 632 else 633 freemsg(mp); 634 break; 635 case M_PROTO: 636 case M_PCPROTO: 637 ipnet_wputnondata(q, mp); 638 break; 639 case M_IOCTL: 640 ipnet_ioctl(q, mp); 641 break; 642 case M_IOCDATA: 643 ipnet_iocdata(q, mp); 644 break; 645 default: 646 freemsg(mp); 647 break; 648 } 649 return (0); 650 } 651 652 static int 653 ipnet_rsrv(queue_t *q) 654 { 655 mblk_t *mp; 656 657 while ((mp = getq(q)) != NULL) { 658 ASSERT(DB_TYPE(mp) == M_DATA); 659 if (canputnext(q)) { 660 putnext(q, mp); 661 } else { 662 (void) putbq(q, mp); 663 break; 664 } 665 } 666 return (0); 667 } 668 669 static void 670 ipnet_ioctl(queue_t *q, mblk_t *mp) 671 { 672 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 673 674 switch (iocp->ioc_cmd) { 675 case DLIOCRAW: 676 miocack(q, mp, 0, 0); 677 break; 678 case DLIOCIPNETINFO: 679 if (iocp->ioc_count == TRANSPARENT) { 680 mcopyin(mp, NULL, sizeof (uint_t), NULL); 681 qreply(q, mp); 682 break; 683 } 684 /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */ 685 default: 686 miocnak(q, mp, 0, EINVAL); 687 break; 688 } 689 } 690 691 static void 692 ipnet_iocdata(queue_t *q, mblk_t *mp) 693 { 694 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 695 ipnet_t *ipnet = q->q_ptr; 696 697 switch (iocp->ioc_cmd) { 698 case DLIOCIPNETINFO: 699 if (*(int *)mp->b_cont->b_rptr == 1) 700 ipnet->ipnet_flags |= IPNET_INFO; 701 else if (*(int *)mp->b_cont->b_rptr == 0) 702 ipnet->ipnet_flags &= ~IPNET_INFO; 703 else 704 goto iocnak; 705 miocack(q, mp, 0, DL_IPNETINFO_VERSION); 706 break; 707 default: 708 iocnak: 709 miocnak(q, mp, 0, EINVAL); 710 break; 711 } 712 } 713 714 static void 715 ipnet_wputnondata(queue_t *q, mblk_t *mp) 716 { 717 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 718 t_uscalar_t prim = dlp->dl_primitive; 719 720 switch (prim) { 721 case DL_INFO_REQ: 722 ipnet_inforeq(q, mp); 723 break; 724 case DL_UNBIND_REQ: 725 ipnet_unbindreq(q, mp); 726 break; 727 case DL_BIND_REQ: 728 ipnet_bindreq(q, mp); 729 break; 730 case DL_PROMISCON_REQ: 731 ipnet_dlpromisconreq(q, mp); 732 break; 733 case DL_PROMISCOFF_REQ: 734 ipnet_dlpromiscoffreq(q, mp); 735 break; 736 case DL_UNITDATA_REQ: 737 case DL_DETACH_REQ: 738 case DL_PHYS_ADDR_REQ: 739 case DL_SET_PHYS_ADDR_REQ: 740 case DL_ENABMULTI_REQ: 741 case DL_DISABMULTI_REQ: 742 case DL_ATTACH_REQ: 743 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); 744 break; 745 default: 746 dlerrorack(q, mp, prim, DL_BADPRIM, 0); 747 break; 748 } 749 } 750 751 static void 752 ipnet_inforeq(queue_t *q, mblk_t *mp) 753 { 754 dl_info_ack_t *dlip; 755 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); 756 757 if (MBLKL(mp) < DL_INFO_REQ_SIZE) { 758 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); 759 return; 760 } 761 762 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) 763 return; 764 765 dlip = (dl_info_ack_t *)mp->b_rptr; 766 *dlip = ipnet_infoack; 767 qreply(q, mp); 768 } 769 770 static void 771 ipnet_bindreq(queue_t *q, mblk_t *mp) 772 { 773 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 774 ipnet_t *ipnet = q->q_ptr; 775 776 if (MBLKL(mp) < DL_BIND_REQ_SIZE) { 777 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); 778 return; 779 } 780 781 switch (dlp->bind_req.dl_sap) { 782 case 0 : 783 ipnet->ipnet_family = AF_UNSPEC; 784 break; 785 case IPV4_VERSION : 786 ipnet->ipnet_family = AF_INET; 787 break; 788 case IPV6_VERSION : 789 ipnet->ipnet_family = AF_INET6; 790 break; 791 default : 792 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); 793 return; 794 /*NOTREACHED*/ 795 } 796 797 ipnet->ipnet_dlstate = DL_IDLE; 798 dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0); 799 } 800 801 static void 802 ipnet_unbindreq(queue_t *q, mblk_t *mp) 803 { 804 ipnet_t *ipnet = q->q_ptr; 805 806 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { 807 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); 808 return; 809 } 810 811 if (ipnet->ipnet_dlstate != DL_IDLE) { 812 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); 813 } else { 814 ipnet->ipnet_dlstate = DL_UNBOUND; 815 ipnet->ipnet_family = AF_UNSPEC; 816 dlokack(q, mp, DL_UNBIND_REQ); 817 } 818 } 819 820 static void 821 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) 822 { 823 ipnet_t *ipnet = q->q_ptr; 824 t_uscalar_t level; 825 int err; 826 827 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { 828 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 829 return; 830 } 831 832 if (ipnet->ipnet_flags & IPNET_LOMODE) { 833 dlokack(q, mp, DL_PROMISCON_REQ); 834 return; 835 } 836 837 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 838 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 839 if ((err = ipnet_join_allmulti(ipnet->ipnet_if, 840 ipnet->ipnet_ns->netstack_ipnet)) != 0) { 841 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); 842 return; 843 } 844 } 845 846 switch (level) { 847 case DL_PROMISC_PHYS: 848 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; 849 break; 850 case DL_PROMISC_SAP: 851 ipnet->ipnet_flags |= IPNET_PROMISC_SAP; 852 break; 853 case DL_PROMISC_MULTI: 854 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; 855 break; 856 default: 857 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 858 return; 859 } 860 861 dlokack(q, mp, DL_PROMISCON_REQ); 862 } 863 864 static void 865 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) 866 { 867 ipnet_t *ipnet = q->q_ptr; 868 t_uscalar_t level; 869 uint16_t orig_ipnet_flags = ipnet->ipnet_flags; 870 871 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { 872 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 873 return; 874 } 875 876 if (ipnet->ipnet_flags & IPNET_LOMODE) { 877 dlokack(q, mp, DL_PROMISCOFF_REQ); 878 return; 879 } 880 881 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 882 switch (level) { 883 case DL_PROMISC_PHYS: 884 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 885 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; 886 break; 887 case DL_PROMISC_SAP: 888 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) 889 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; 890 break; 891 case DL_PROMISC_MULTI: 892 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 893 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; 894 break; 895 default: 896 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 897 return; 898 } 899 900 if (orig_ipnet_flags == ipnet->ipnet_flags) { 901 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); 902 return; 903 } 904 905 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 906 ipnet_leave_allmulti(ipnet->ipnet_if, 907 ipnet->ipnet_ns->netstack_ipnet); 908 } 909 910 dlokack(q, mp, DL_PROMISCOFF_REQ); 911 } 912 913 static int 914 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 915 { 916 int err = 0; 917 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 918 uint64_t index = ipnetif->if_index; 919 920 mutex_enter(&ips->ips_event_lock); 921 if (ipnetif->if_multicnt == 0) { 922 ASSERT((ipnetif->if_flags & 923 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); 924 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { 925 err = ip_join_allmulti(index, B_FALSE, ipst); 926 if (err != 0) 927 goto done; 928 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; 929 } 930 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { 931 err = ip_join_allmulti(index, B_TRUE, ipst); 932 if (err != 0 && 933 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { 934 (void) ip_leave_allmulti(index, B_FALSE, ipst); 935 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 936 goto done; 937 } 938 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; 939 } 940 } 941 ipnetif->if_multicnt++; 942 943 done: 944 mutex_exit(&ips->ips_event_lock); 945 return (err); 946 } 947 948 static void 949 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 950 { 951 int err; 952 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 953 uint64_t index = ipnetif->if_index; 954 955 mutex_enter(&ips->ips_event_lock); 956 ASSERT(ipnetif->if_multicnt != 0); 957 if (--ipnetif->if_multicnt == 0) { 958 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { 959 err = ip_leave_allmulti(index, B_FALSE, ipst); 960 ASSERT(err == 0 || err == ENODEV); 961 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 962 } 963 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { 964 err = ip_leave_allmulti(index, B_TRUE, ipst); 965 ASSERT(err == 0 || err == ENODEV); 966 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; 967 } 968 } 969 mutex_exit(&ips->ips_event_lock); 970 } 971 972 /* 973 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it. 974 * The structure it copies the header information from, 975 * hook_pkt_observe_t, is constructed using network byte 976 * order in ipobs_hook(), so there is no conversion here. 977 */ 978 static mblk_t * 979 ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp) 980 { 981 mblk_t *dlhdr; 982 dl_ipnetinfo_t *dl; 983 984 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { 985 freemsg(mp); 986 return (NULL); 987 } 988 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; 989 dl->dli_version = DL_IPNETINFO_VERSION; 990 dl->dli_family = hdr->hpo_family; 991 dl->dli_htype = hdr->hpo_htype; 992 dl->dli_pktlen = hdr->hpo_pktlen; 993 dl->dli_ifindex = hdr->hpo_ifindex; 994 dl->dli_grifindex = hdr->hpo_grifindex; 995 dl->dli_zsrc = hdr->hpo_zsrc; 996 dl->dli_zdst = hdr->hpo_zdst; 997 dlhdr->b_wptr += sizeof (*dl); 998 dlhdr->b_cont = mp; 999 1000 return (dlhdr); 1001 } 1002 1003 static ipnet_addrtype_t 1004 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) 1005 { 1006 list_t *list; 1007 ipnetif_t *ipnetif = ipnet->ipnet_if; 1008 ipnetif_addr_t *ifaddr; 1009 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; 1010 1011 /* First check if the address is multicast or limited broadcast. */ 1012 switch (addr->iap_family) { 1013 case AF_INET: 1014 if (CLASSD(*(addr->iap_addr4)) || 1015 *(addr->iap_addr4) == INADDR_BROADCAST) 1016 return (IPNETADDR_MBCAST); 1017 break; 1018 case AF_INET6: 1019 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) 1020 return (IPNETADDR_MBCAST); 1021 break; 1022 } 1023 1024 /* 1025 * Walk the address list to see if the address belongs to our 1026 * interface or is one of our subnet broadcast addresses. 1027 */ 1028 mutex_enter(&ipnetif->if_addr_lock); 1029 list = (addr->iap_family == AF_INET) ? 1030 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; 1031 for (ifaddr = list_head(list); 1032 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; 1033 ifaddr = list_next(list, ifaddr)) { 1034 /* 1035 * If we're not in the global zone, then only look at 1036 * addresses in our zone. 1037 */ 1038 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1039 ipnet->ipnet_zoneid != ifaddr->ifa_zone) 1040 continue; 1041 switch (addr->iap_family) { 1042 case AF_INET: 1043 if (ifaddr->ifa_ip4addr != INADDR_ANY && 1044 *(addr->iap_addr4) == ifaddr->ifa_ip4addr) 1045 addrtype = IPNETADDR_MYADDR; 1046 else if (ifaddr->ifa_brdaddr != INADDR_ANY && 1047 *(addr->iap_addr4) == ifaddr->ifa_brdaddr) 1048 addrtype = IPNETADDR_MBCAST; 1049 break; 1050 case AF_INET6: 1051 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, 1052 &ifaddr->ifa_ip6addr)) 1053 addrtype = IPNETADDR_MYADDR; 1054 break; 1055 } 1056 } 1057 mutex_exit(&ipnetif->if_addr_lock); 1058 1059 return (addrtype); 1060 } 1061 1062 /* 1063 * Verify if the packet contained in hdr should be passed up to the 1064 * ipnet client stream. 1065 */ 1066 static boolean_t 1067 ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src, 1068 ipnet_addrp_t *dst) 1069 { 1070 boolean_t obsif; 1071 uint64_t ifindex = ipnet->ipnet_if->if_index; 1072 ipnet_addrtype_t srctype; 1073 ipnet_addrtype_t dsttype; 1074 1075 srctype = ipnet_get_addrtype(ipnet, src); 1076 dsttype = ipnet_get_addrtype(ipnet, dst); 1077 1078 /* 1079 * If the packet's ifindex matches ours, or the packet's group ifindex 1080 * matches ours, it's on the interface we're observing. (Thus, 1081 * observing on the group ifindex matches all ifindexes in the group.) 1082 */ 1083 obsif = (ntohl(hdr->hpo_ifindex) == ifindex || 1084 ntohl(hdr->hpo_grifindex) == ifindex); 1085 1086 DTRACE_PROBE5(ipnet_accept__addr, 1087 ipnet_addrtype_t, srctype, ipnet_addrp_t *, src, 1088 ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst, 1089 boolean_t, obsif); 1090 1091 /* 1092 * Do not allow an ipnet stream to see packets that are not from or to 1093 * its zone. The exception is when zones are using the shared stack 1094 * model. In this case, streams in the global zone have visibility 1095 * into other shared-stack zones, and broadcast and multicast traffic 1096 * is visible by all zones in the stack. 1097 */ 1098 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1099 dsttype != IPNETADDR_MBCAST) { 1100 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) && 1101 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst)) 1102 return (B_FALSE); 1103 } 1104 1105 /* 1106 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the 1107 * packet's IP version. 1108 */ 1109 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && 1110 ipnet->ipnet_family != hdr->hpo_family) 1111 return (B_FALSE); 1112 1113 /* If the destination address is ours, then accept the packet. */ 1114 if (dsttype == IPNETADDR_MYADDR) 1115 return (B_TRUE); 1116 1117 /* 1118 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are 1119 * sent or received on the interface we're observing, or packets that 1120 * have our source address (this allows us to see packets we send). 1121 */ 1122 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { 1123 if (srctype == IPNETADDR_MYADDR || obsif) 1124 return (B_TRUE); 1125 } 1126 1127 /* 1128 * We accept multicast and broadcast packets transmitted or received 1129 * on the interface we're observing. 1130 */ 1131 if (dsttype == IPNETADDR_MBCAST && obsif) 1132 return (B_TRUE); 1133 1134 return (B_FALSE); 1135 } 1136 1137 /* 1138 * Verify if the packet contained in hdr should be passed up to the ipnet 1139 * client stream that's in IPNET_LOMODE. 1140 */ 1141 /* ARGSUSED */ 1142 static boolean_t 1143 ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src, 1144 ipnet_addrp_t *dst) 1145 { 1146 if (hdr->hpo_htype != htons(IPOBS_HOOK_LOCAL)) { 1147 /* 1148 * ipnet_if is only NULL for IPNET_MINOR_LO devices. 1149 */ 1150 if (ipnet->ipnet_if == NULL) 1151 return (B_FALSE); 1152 } 1153 1154 /* 1155 * An ipnet stream must not see packets that are not from/to its zone. 1156 */ 1157 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { 1158 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) && 1159 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst)) 1160 return (B_FALSE); 1161 } 1162 1163 return (ipnet->ipnet_family == AF_UNSPEC || 1164 ipnet->ipnet_family == hdr->hpo_family); 1165 } 1166 1167 static void 1168 ipnet_dispatch(void *arg) 1169 { 1170 mblk_t *mp = arg; 1171 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr; 1172 ipnet_t *ipnet; 1173 mblk_t *netmp; 1174 list_t *list; 1175 ipnet_stack_t *ips; 1176 ipnet_addrp_t src; 1177 ipnet_addrp_t dst; 1178 1179 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 1180 1181 netmp = hdr->hpo_pkt->b_cont; 1182 src.iap_family = hdr->hpo_family; 1183 dst.iap_family = hdr->hpo_family; 1184 1185 if (hdr->hpo_family == AF_INET) { 1186 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src; 1187 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst; 1188 } else { 1189 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src; 1190 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst; 1191 } 1192 1193 ipnet_walkers_inc(ips); 1194 1195 list = &ips->ips_str_list; 1196 for (ipnet = list_head(list); ipnet != NULL; 1197 ipnet = list_next(list, ipnet)) { 1198 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) { 1199 IPSK_BUMP(ips, ik_acceptFail); 1200 continue; 1201 } 1202 IPSK_BUMP(ips, ik_acceptOk); 1203 1204 if (list_next(list, ipnet) == NULL) { 1205 netmp = hdr->hpo_pkt->b_cont; 1206 hdr->hpo_pkt->b_cont = NULL; 1207 } else { 1208 if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL && 1209 (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) { 1210 IPSK_BUMP(ips, ik_duplicationFail); 1211 continue; 1212 } 1213 } 1214 1215 if (ipnet->ipnet_flags & IPNET_INFO) { 1216 if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) { 1217 IPSK_BUMP(ips, ik_dispatchHeaderDrop); 1218 continue; 1219 } 1220 } 1221 1222 if (ipnet->ipnet_rq->q_first == NULL && 1223 canputnext(ipnet->ipnet_rq)) { 1224 putnext(ipnet->ipnet_rq, netmp); 1225 IPSK_BUMP(ips, ik_dispatchDeliver); 1226 } else if (canput(ipnet->ipnet_rq)) { 1227 (void) putq(ipnet->ipnet_rq, netmp); 1228 IPSK_BUMP(ips, ik_dispatchDeliver); 1229 } else { 1230 freemsg(netmp); 1231 IPSK_BUMP(ips, ik_dispatchPutDrop); 1232 } 1233 } 1234 1235 ipnet_walkers_dec(ips); 1236 1237 freemsg(mp); 1238 } 1239 1240 static void 1241 ipnet_input(mblk_t *mp) 1242 { 1243 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr; 1244 ipnet_stack_t *ips; 1245 1246 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 1247 1248 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != 1249 DDI_SUCCESS) { 1250 IPSK_BUMP(ips, ik_dispatchFail); 1251 freemsg(mp); 1252 } else { 1253 IPSK_BUMP(ips, ik_dispatchOk); 1254 } 1255 } 1256 1257 static ipnetif_t * 1258 ipnet_alloc_if(ipnet_stack_t *ips) 1259 { 1260 ipnetif_t *ipnetif; 1261 1262 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) 1263 return (NULL); 1264 1265 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); 1266 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), 1267 offsetof(ipnetif_addr_t, ifa_link)); 1268 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), 1269 offsetof(ipnetif_addr_t, ifa_link)); 1270 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); 1271 1272 ipnetif->if_stackp = ips; 1273 1274 return (ipnetif); 1275 } 1276 1277 /* 1278 * Create a new ipnetif_t and new minor node for it. If creation is 1279 * successful the new ipnetif_t is inserted into an avl_tree 1280 * containing ipnetif's for this stack instance. 1281 */ 1282 static ipnetif_t * 1283 ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips, 1284 uint64_t ifflags) 1285 { 1286 ipnetif_t *ipnetif; 1287 avl_index_t where = 0; 1288 minor_t ifminor; 1289 1290 /* 1291 * Because ipnetif_create() can be called from a NIC event 1292 * callback, it should not block. 1293 */ 1294 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); 1295 if (ifminor == (minor_t)-1) 1296 return (NULL); 1297 if ((ipnetif = ipnet_alloc_if(ips)) == NULL) { 1298 id_free(ipnet_minor_space, ifminor); 1299 return (NULL); 1300 } 1301 1302 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); 1303 ipnetif->if_index = (uint_t)index; 1304 ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack); 1305 ipnetif->if_dev = makedevice(ipnet_major, ifminor); 1306 1307 ipnetif->if_refcnt = 1; 1308 if ((ifflags & IFF_LOOPBACK) != 0) 1309 ipnetif->if_flags = IPNETIF_LOOPBACK; 1310 1311 mutex_enter(&ips->ips_avl_lock); 1312 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); 1313 avl_insert(&ips->ips_avl_by_index, ipnetif, where); 1314 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); 1315 avl_insert(&ips->ips_avl_by_name, ipnetif, where); 1316 mutex_exit(&ips->ips_avl_lock); 1317 1318 return (ipnetif); 1319 } 1320 1321 static void 1322 ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1323 { 1324 ipnet_t *ipnet; 1325 1326 ipnet_walkers_inc(ips); 1327 /* Send a SIGHUP to all open streams associated with this ipnetif. */ 1328 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; 1329 ipnet = list_next(&ips->ips_str_list, ipnet)) { 1330 if (ipnet->ipnet_if == ipnetif) 1331 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1332 } 1333 ipnet_walkers_dec(ips); 1334 mutex_enter(&ips->ips_avl_lock); 1335 avl_remove(&ips->ips_avl_by_index, ipnetif); 1336 avl_remove(&ips->ips_avl_by_name, ipnetif); 1337 mutex_exit(&ips->ips_avl_lock); 1338 /* 1339 * Release the reference we implicitly held in ipnetif_create(). 1340 */ 1341 ipnetif_refrele(ipnetif); 1342 } 1343 1344 static void 1345 ipnet_purge_addrlist(list_t *addrlist) 1346 { 1347 ipnetif_addr_t *ifa; 1348 1349 while ((ifa = list_head(addrlist)) != NULL) { 1350 list_remove(addrlist, ifa); 1351 if (ifa->ifa_shared != NULL) 1352 ipnetif_clone_release(ifa->ifa_shared); 1353 kmem_free(ifa, sizeof (*ifa)); 1354 } 1355 } 1356 1357 static void 1358 ipnetif_free(ipnetif_t *ipnetif) 1359 { 1360 ASSERT(ipnetif->if_refcnt == 0); 1361 ASSERT(ipnetif->if_sharecnt == 0); 1362 1363 /* Remove IPv4/v6 address lists from the ipnetif */ 1364 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); 1365 list_destroy(&ipnetif->if_ip4addr_list); 1366 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); 1367 list_destroy(&ipnetif->if_ip6addr_list); 1368 mutex_destroy(&ipnetif->if_addr_lock); 1369 mutex_destroy(&ipnetif->if_reflock); 1370 if (ipnetif->if_dev != 0) 1371 id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); 1372 kmem_free(ipnetif, sizeof (*ipnetif)); 1373 } 1374 1375 /* 1376 * Create an ipnetif_addr_t with the given logical interface id (lif) 1377 * and add it to the supplied ipnetif. The lif is the netinfo 1378 * representation of logical interface id, and we use this id to match 1379 * incoming netinfo events against our lists of addresses. 1380 */ 1381 static void 1382 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) 1383 { 1384 ipnetif_addr_t *ifaddr; 1385 zoneid_t zoneid; 1386 struct sockaddr_in bcast; 1387 struct sockaddr_storage addr; 1388 net_ifaddr_t type = NA_ADDRESS; 1389 uint64_t phyif = ipnetif->if_index; 1390 1391 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || 1392 net_getlifzone(nd, phyif, lif, &zoneid) != 0) 1393 return; 1394 1395 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) 1396 return; 1397 ifaddr->ifa_zone = zoneid; 1398 ifaddr->ifa_id = lif; 1399 ifaddr->ifa_shared = NULL; 1400 1401 switch (addr.ss_family) { 1402 case AF_INET: 1403 ifaddr->ifa_ip4addr = 1404 ((struct sockaddr_in *)&addr)->sin_addr.s_addr; 1405 /* 1406 * Try and get the broadcast address. Note that it's okay for 1407 * an interface to not have a broadcast address, so we don't 1408 * fail the entire operation if net_getlifaddr() fails here. 1409 */ 1410 type = NA_BROADCAST; 1411 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) 1412 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; 1413 break; 1414 case AF_INET6: 1415 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; 1416 break; 1417 } 1418 1419 /* 1420 * The zoneid stored in ipnetif_t needs to correspond to the actual 1421 * zone the address is being used in. This facilitates finding the 1422 * correct netstack_t pointer, amongst other things, later. 1423 */ 1424 if (zoneid == ALL_ZONES) 1425 zoneid = GLOBAL_ZONEID; 1426 1427 mutex_enter(&ipnetif->if_addr_lock); 1428 if (zoneid != ipnetif->if_zoneid) { 1429 ipnetif_t *ifp2; 1430 1431 ifp2 = ipnetif_clone_create(ipnetif, zoneid); 1432 ifaddr->ifa_shared = ifp2; 1433 } 1434 list_insert_tail(addr.ss_family == AF_INET ? 1435 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); 1436 mutex_exit(&ipnetif->if_addr_lock); 1437 } 1438 1439 static void 1440 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) 1441 { 1442 mutex_enter(&ipnetif->if_addr_lock); 1443 if (ifaddr->ifa_shared != NULL) 1444 ipnetif_clone_release(ifaddr->ifa_shared); 1445 1446 list_remove(isv6 ? 1447 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); 1448 mutex_exit(&ipnetif->if_addr_lock); 1449 kmem_free(ifaddr, sizeof (*ifaddr)); 1450 } 1451 1452 static void 1453 ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6) 1454 { 1455 ipnetif_t *ipnetif; 1456 boolean_t refrele_needed = B_TRUE; 1457 uint64_t ifflags; 1458 uint64_t ifindex; 1459 char *ifname; 1460 1461 ifflags = 0; 1462 ifname = ipne->ipne_ifname; 1463 ifindex = ipne->ipne_ifindex; 1464 1465 (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags); 1466 1467 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) { 1468 ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags); 1469 refrele_needed = B_FALSE; 1470 } 1471 if (ipnetif != NULL) { 1472 ipnetif->if_flags |= 1473 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 1474 } 1475 1476 if (ipnetif->if_multicnt != 0) { 1477 if (ip_join_allmulti(ifindex, isv6, 1478 ips->ips_netstack->netstack_ip) == 0) { 1479 ipnetif->if_flags |= 1480 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; 1481 } 1482 } 1483 1484 if (refrele_needed) 1485 ipnetif_refrele(ipnetif); 1486 } 1487 1488 static void 1489 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) 1490 { 1491 ipnetif_t *ipnetif; 1492 1493 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1494 return; 1495 1496 mutex_enter(&ipnetif->if_addr_lock); 1497 ipnet_purge_addrlist(isv6 ? 1498 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); 1499 mutex_exit(&ipnetif->if_addr_lock); 1500 1501 /* 1502 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive 1503 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif 1504 * if both IPv4 and IPv6 interfaces have been unplumbed. 1505 */ 1506 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; 1507 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) 1508 ipnetif_remove(ipnetif, ips); 1509 ipnetif_refrele(ipnetif); 1510 } 1511 1512 static void 1513 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, 1514 ipnet_stack_t *ips, boolean_t isv6) 1515 { 1516 ipnetif_t *ipnetif; 1517 ipnetif_addr_t *ifaddr; 1518 1519 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1520 return; 1521 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { 1522 /* 1523 * We must have missed a NE_LIF_DOWN event. Delete this 1524 * ifaddr and re-create it. 1525 */ 1526 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1527 } 1528 1529 ipnet_add_ifaddr(lifindex, ipnetif, nd); 1530 ipnetif_refrele(ipnetif); 1531 } 1532 1533 static void 1534 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, 1535 boolean_t isv6) 1536 { 1537 ipnetif_t *ipnetif; 1538 ipnetif_addr_t *ifaddr; 1539 1540 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1541 return; 1542 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) 1543 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1544 ipnetif_refrele(ipnetif); 1545 /* 1546 * Make sure that open streams on this ipnetif are still allowed to 1547 * have it open. 1548 */ 1549 ipnetif_zonecheck(ipnetif, ips); 1550 } 1551 1552 /* 1553 * This callback from the NIC event framework dispatches a taskq as the event 1554 * handlers may block. 1555 */ 1556 /* ARGSUSED */ 1557 static int 1558 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) 1559 { 1560 ipnet_stack_t *ips = arg; 1561 hook_nic_event_t *hn = (hook_nic_event_t *)info; 1562 ipnet_nicevent_t *ipne; 1563 1564 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) 1565 return (0); 1566 ipne->ipne_event = hn->hne_event; 1567 ipne->ipne_protocol = hn->hne_protocol; 1568 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; 1569 ipne->ipne_ifindex = hn->hne_nic; 1570 ipne->ipne_lifindex = hn->hne_lif; 1571 if (hn->hne_datalen != 0) { 1572 (void) strlcpy(ipne->ipne_ifname, hn->hne_data, 1573 sizeof (ipne->ipne_ifname)); 1574 } 1575 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, 1576 ipne, DDI_NOSLEEP); 1577 return (0); 1578 } 1579 1580 static void 1581 ipnet_nicevent_task(void *arg) 1582 { 1583 ipnet_nicevent_t *ipne = arg; 1584 netstack_t *ns; 1585 ipnet_stack_t *ips; 1586 boolean_t isv6; 1587 1588 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) 1589 goto done; 1590 ips = ns->netstack_ipnet; 1591 isv6 = (ipne->ipne_protocol == ips->ips_ndv6); 1592 1593 mutex_enter(&ips->ips_event_lock); 1594 switch (ipne->ipne_event) { 1595 case NE_PLUMB: 1596 ipnet_plumb_ev(ipne, ips, isv6); 1597 break; 1598 case NE_UNPLUMB: 1599 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); 1600 break; 1601 case NE_LIF_UP: 1602 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, 1603 ipne->ipne_protocol, ips, isv6); 1604 break; 1605 case NE_LIF_DOWN: 1606 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, 1607 isv6); 1608 break; 1609 default: 1610 break; 1611 } 1612 mutex_exit(&ips->ips_event_lock); 1613 done: 1614 if (ns != NULL) 1615 netstack_rele(ns); 1616 kmem_free(ipne, sizeof (ipnet_nicevent_t)); 1617 } 1618 1619 dev_t 1620 ipnet_if_getdev(char *name, zoneid_t zoneid) 1621 { 1622 netstack_t *ns; 1623 ipnet_stack_t *ips; 1624 ipnetif_t *ipnetif; 1625 dev_t dev = (dev_t)-1; 1626 1627 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1628 return (dev); 1629 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1630 return (dev); 1631 1632 ips = ns->netstack_ipnet; 1633 mutex_enter(&ips->ips_avl_lock); 1634 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { 1635 if (ipnetif_in_zone(ipnetif, zoneid, ips)) 1636 dev = ipnetif->if_dev; 1637 } 1638 mutex_exit(&ips->ips_avl_lock); 1639 netstack_rele(ns); 1640 1641 return (dev); 1642 } 1643 1644 static ipnetif_t * 1645 ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips) 1646 { 1647 ipnetif_t *ipnetif; 1648 1649 mutex_enter(&ips->ips_avl_lock); 1650 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) 1651 ipnetif_refhold(ipnetif); 1652 mutex_exit(&ips->ips_avl_lock); 1653 return (ipnetif); 1654 } 1655 1656 static ipnetif_t * 1657 ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips) 1658 { 1659 ipnetif_t *ipnetif; 1660 avl_tree_t *tree; 1661 1662 mutex_enter(&ips->ips_avl_lock); 1663 tree = &ips->ips_avl_by_index; 1664 for (ipnetif = avl_first(tree); ipnetif != NULL; 1665 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { 1666 if (ipnetif->if_dev == dev) { 1667 ipnetif_refhold(ipnetif); 1668 break; 1669 } 1670 } 1671 mutex_exit(&ips->ips_avl_lock); 1672 return (ipnetif); 1673 } 1674 1675 static ipnetif_addr_t * 1676 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) 1677 { 1678 ipnetif_addr_t *ifaddr; 1679 list_t *list; 1680 1681 mutex_enter(&ipnetif->if_addr_lock); 1682 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; 1683 for (ifaddr = list_head(list); ifaddr != NULL; 1684 ifaddr = list_next(list, ifaddr)) { 1685 if (lid == ifaddr->ifa_id) 1686 break; 1687 } 1688 mutex_exit(&ipnetif->if_addr_lock); 1689 return (ifaddr); 1690 } 1691 1692 /* ARGSUSED */ 1693 static void * 1694 ipnet_stack_init(netstackid_t stackid, netstack_t *ns) 1695 { 1696 ipnet_stack_t *ips; 1697 1698 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); 1699 ips->ips_netstack = ns; 1700 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); 1701 avl_create(&ips->ips_avl_by_index, ipnetif_compare_index, 1702 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); 1703 avl_create(&ips->ips_avl_by_name, ipnetif_compare_name, 1704 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); 1705 avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone, 1706 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared)); 1707 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); 1708 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); 1709 list_create(&ips->ips_str_list, sizeof (ipnet_t), 1710 offsetof(ipnet_t, ipnet_next)); 1711 ipnet_register_netihook(ips); 1712 return (ips); 1713 } 1714 1715 /* ARGSUSED */ 1716 static void 1717 ipnet_stack_fini(netstackid_t stackid, void *arg) 1718 { 1719 ipnet_stack_t *ips = arg; 1720 ipnetif_t *ipnetif, *nipnetif; 1721 1722 if (ips->ips_kstatp != NULL) { 1723 zoneid_t zoneid; 1724 1725 zoneid = netstackid_to_zoneid(stackid); 1726 net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp); 1727 } 1728 if (ips->ips_ndv4 != NULL) { 1729 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, 1730 ips->ips_nicevents) == 0); 1731 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 1732 } 1733 if (ips->ips_ndv6 != NULL) { 1734 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, 1735 ips->ips_nicevents) == 0); 1736 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 1737 } 1738 hook_free(ips->ips_nicevents); 1739 1740 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1741 ipnetif = nipnetif) { 1742 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); 1743 ipnetif_remove(ipnetif, ips); 1744 } 1745 avl_destroy(&ips->ips_avl_by_shared); 1746 avl_destroy(&ips->ips_avl_by_index); 1747 avl_destroy(&ips->ips_avl_by_name); 1748 mutex_destroy(&ips->ips_avl_lock); 1749 mutex_destroy(&ips->ips_walkers_lock); 1750 cv_destroy(&ips->ips_walkers_cv); 1751 list_destroy(&ips->ips_str_list); 1752 kmem_free(ips, sizeof (*ips)); 1753 } 1754 1755 /* Do any of the addresses in addrlist belong the supplied zoneid? */ 1756 static boolean_t 1757 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) 1758 { 1759 ipnetif_addr_t *ifa; 1760 1761 for (ifa = list_head(addrlist); ifa != NULL; 1762 ifa = list_next(addrlist, ifa)) { 1763 if (ifa->ifa_zone == zoneid) 1764 return (B_TRUE); 1765 } 1766 return (B_FALSE); 1767 } 1768 1769 /* Should the supplied ipnetif be visible from the supplied zoneid? */ 1770 static boolean_t 1771 ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) 1772 { 1773 int ret; 1774 1775 /* 1776 * The global zone has visibility into all interfaces in the global 1777 * stack, and exclusive stack zones have visibility into all 1778 * interfaces in their stack. 1779 */ 1780 if (zoneid == GLOBAL_ZONEID || 1781 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1782 return (B_TRUE); 1783 1784 /* 1785 * Shared-stack zones only have visibility for interfaces that have 1786 * addresses in their zone. 1787 */ 1788 mutex_enter(&ipnetif->if_addr_lock); 1789 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || 1790 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); 1791 mutex_exit(&ipnetif->if_addr_lock); 1792 return (ret); 1793 } 1794 1795 /* 1796 * Verify that any ipnet_t that has a reference to the supplied ipnetif should 1797 * still be allowed to have it open. A given ipnet_t may no longer be allowed 1798 * to have an ipnetif open if there are no longer any addresses that belong to 1799 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the 1800 * case, send the ipnet_t an M_HANGUP. 1801 */ 1802 static void 1803 ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1804 { 1805 list_t *strlist = &ips->ips_str_list; 1806 ipnet_t *ipnet; 1807 1808 ipnet_walkers_inc(ips); 1809 for (ipnet = list_head(strlist); ipnet != NULL; 1810 ipnet = list_next(strlist, ipnet)) { 1811 if (ipnet->ipnet_if != ipnetif) 1812 continue; 1813 if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) 1814 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1815 } 1816 ipnet_walkers_dec(ips); 1817 } 1818 1819 void 1820 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) 1821 { 1822 ipnetif_t *ipnetif; 1823 list_t cbdata; 1824 ipnetif_cbdata_t *cbnode; 1825 netstack_t *ns; 1826 ipnet_stack_t *ips; 1827 1828 /* 1829 * On labeled systems, non-global zones shouldn't see anything 1830 * in /dev/ipnet. 1831 */ 1832 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1833 return; 1834 1835 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1836 return; 1837 1838 ips = ns->netstack_ipnet; 1839 list_create(&cbdata, sizeof (ipnetif_cbdata_t), 1840 offsetof(ipnetif_cbdata_t, ic_next)); 1841 1842 mutex_enter(&ips->ips_avl_lock); 1843 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1844 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { 1845 if (!ipnetif_in_zone(ipnetif, zoneid, ips)) 1846 continue; 1847 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); 1848 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); 1849 cbnode->ic_dev = ipnetif->if_dev; 1850 list_insert_head(&cbdata, cbnode); 1851 } 1852 mutex_exit(&ips->ips_avl_lock); 1853 1854 while ((cbnode = list_head(&cbdata)) != NULL) { 1855 cb(cbnode->ic_ifname, arg, cbnode->ic_dev); 1856 list_remove(&cbdata, cbnode); 1857 kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); 1858 } 1859 list_destroy(&cbdata); 1860 netstack_rele(ns); 1861 } 1862 1863 static int 1864 ipnetif_compare_index(const void *index_ptr, const void *ipnetifp) 1865 { 1866 int64_t index1 = *((int64_t *)index_ptr); 1867 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; 1868 1869 return (SIGNOF(index2 - index1)); 1870 } 1871 1872 static int 1873 ipnetif_compare_name(const void *name_ptr, const void *ipnetifp) 1874 { 1875 int res; 1876 1877 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); 1878 return (SIGNOF(res)); 1879 } 1880 1881 static int 1882 ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp) 1883 { 1884 const uintptr_t *ptr = key_ptr; 1885 const ipnetif_t *ifp; 1886 int res; 1887 1888 ifp = ipnetifp; 1889 res = ifp->if_zoneid - ptr[0]; 1890 if (res != 0) 1891 return (SIGNOF(res)); 1892 res = strcmp(ifp->if_name, (char *)ptr[1]); 1893 return (SIGNOF(res)); 1894 } 1895 1896 static void 1897 ipnetif_refhold(ipnetif_t *ipnetif) 1898 { 1899 mutex_enter(&ipnetif->if_reflock); 1900 ipnetif->if_refcnt++; 1901 mutex_exit(&ipnetif->if_reflock); 1902 } 1903 1904 static void 1905 ipnetif_refrele(ipnetif_t *ipnetif) 1906 { 1907 mutex_enter(&ipnetif->if_reflock); 1908 ASSERT(ipnetif->if_refcnt > 0); 1909 if (--ipnetif->if_refcnt == 0) 1910 ipnetif_free(ipnetif); 1911 else 1912 mutex_exit(&ipnetif->if_reflock); 1913 } 1914 1915 static void 1916 ipnet_walkers_inc(ipnet_stack_t *ips) 1917 { 1918 mutex_enter(&ips->ips_walkers_lock); 1919 ips->ips_walkers_cnt++; 1920 mutex_exit(&ips->ips_walkers_lock); 1921 } 1922 1923 static void 1924 ipnet_walkers_dec(ipnet_stack_t *ips) 1925 { 1926 mutex_enter(&ips->ips_walkers_lock); 1927 ASSERT(ips->ips_walkers_cnt != 0); 1928 if (--ips->ips_walkers_cnt == 0) 1929 cv_broadcast(&ips->ips_walkers_cv); 1930 mutex_exit(&ips->ips_walkers_lock); 1931 } 1932 1933 /*ARGSUSED*/ 1934 static int 1935 ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg) 1936 { 1937 hook_pkt_observe_t *hdr; 1938 pfv_t func = (pfv_t)arg; 1939 mblk_t *mp; 1940 1941 hdr = (hook_pkt_observe_t *)info; 1942 /* 1943 * Code in ip_input() expects that it is the only one accessing the 1944 * packet. 1945 */ 1946 mp = copymsg(hdr->hpo_pkt); 1947 if (mp == NULL) { 1948 netstack_t *ns = hdr->hpo_ctx; 1949 ipnet_stack_t *ips = ns->netstack_ipnet; 1950 1951 IPSK_BUMP(ips, ik_dispatchDupDrop); 1952 return (0); 1953 } 1954 1955 hdr = (hook_pkt_observe_t *)mp->b_rptr; 1956 hdr->hpo_pkt = mp; 1957 1958 func(mp); 1959 1960 return (0); 1961 } 1962 1963 hook_t * 1964 ipobs_register_hook(netstack_t *ns, pfv_t func) 1965 { 1966 ip_stack_t *ipst = ns->netstack_ip; 1967 char name[32]; 1968 hook_t *hook; 1969 1970 HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func); 1971 VERIFY(hook != NULL); 1972 1973 /* 1974 * To register multiple hooks with he same callback function, 1975 * a unique name is needed. 1976 */ 1977 (void) snprintf(name, sizeof (name), "ipobserve_%p", (void *)hook); 1978 hook->h_name = strdup(name); 1979 1980 (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook); 1981 (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook); 1982 1983 return (hook); 1984 } 1985 1986 void 1987 ipobs_unregister_hook(netstack_t *ns, hook_t *hook) 1988 { 1989 ip_stack_t *ipst = ns->netstack_ip; 1990 1991 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook); 1992 1993 (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook); 1994 1995 strfree(hook->h_name); 1996 1997 hook_free(hook); 1998 } 1999 2000 /* ******************************************************************** */ 2001 /* BPF Functions below */ 2002 /* ******************************************************************** */ 2003 2004 /* 2005 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy. 2006 */ 2007 ipnet_stack_t * 2008 ipnet_find_by_zoneid(zoneid_t zoneid) 2009 { 2010 netstack_t *ns; 2011 2012 VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL); 2013 return (ns->netstack_ipnet); 2014 } 2015 2016 /* 2017 * Functions, such as the above ipnet_find_by_zoneid(), will return a 2018 * pointer to ipnet_stack_t by calling a netstack lookup function. 2019 * The netstack_find_*() functions return a pointer after doing a "hold" 2020 * on the data structure and thereby require a "release" when the caller 2021 * is finished with it. We need to mirror that API here and thus a caller 2022 * of ipnet_find_by_zoneid() is required to call ipnet_rele(). 2023 */ 2024 void 2025 ipnet_rele(ipnet_stack_t *ips) 2026 { 2027 netstack_rele(ips->ips_netstack); 2028 } 2029 2030 /* 2031 */ 2032 void 2033 ipnet_set_itap(bpf_itap_fn_t tapfunc) 2034 { 2035 ipnet_itap = tapfunc; 2036 } 2037 2038 /* 2039 * The list of interfaces available via ipnet is private for each zone, 2040 * so the AVL tree of each zone must be searched for a given name, even 2041 * if all names are unique. 2042 */ 2043 int 2044 ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid) 2045 { 2046 ipnet_stack_t *ips; 2047 ipnetif_t *ipnetif; 2048 2049 ASSERT(ptr != NULL); 2050 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL); 2051 2052 mutex_enter(&ips->ips_avl_lock); 2053 2054 /* 2055 * Shared instance zone? 2056 */ 2057 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) { 2058 uintptr_t key[2] = { zoneid, (uintptr_t)name }; 2059 2060 ipnetif = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL); 2061 } else { 2062 ipnetif = avl_find(&ips->ips_avl_by_name, (void *)name, NULL); 2063 } 2064 if (ipnetif != NULL) 2065 ipnetif_refhold(ipnetif); 2066 mutex_exit(&ips->ips_avl_lock); 2067 2068 *ptr = ipnetif; 2069 ipnet_rele(ips); 2070 2071 if (ipnetif == NULL) 2072 return (ESRCH); 2073 return (0); 2074 } 2075 2076 void 2077 ipnet_close_byhandle(ipnetif_t *ifp) 2078 { 2079 ASSERT(ifp != NULL); 2080 ipnetif_refrele(ifp); 2081 } 2082 2083 const char * 2084 ipnet_name(ipnetif_t *ifp) 2085 { 2086 ASSERT(ifp != NULL); 2087 return (ifp->if_name); 2088 } 2089 2090 /* 2091 * To find the linkid for a given name, it is necessary to know which zone 2092 * the interface name belongs to and to search the avl tree for that zone 2093 * as there is no master list of all interfaces and which zone they belong 2094 * to. It is assumed that the caller of this function is somehow already 2095 * working with the ipnet interfaces and hence the ips_event_lock is held. 2096 * When BPF calls into this function, it is doing so because of an event 2097 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id 2098 * value returned has meaning without the need for grabbing a hold on the 2099 * owning structure. 2100 */ 2101 int 2102 ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid) 2103 { 2104 ipnet_stack_t *ips; 2105 ipnetif_t *ifp; 2106 2107 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL); 2108 ASSERT(mutex_owned(&ips->ips_event_lock)); 2109 2110 mutex_enter(&ips->ips_avl_lock); 2111 ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL); 2112 if (ifp != NULL) 2113 *idp = (uint_t)ifp->if_index; 2114 2115 /* 2116 * Shared instance zone? 2117 */ 2118 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) { 2119 uintptr_t key[2] = { zoneid, (uintptr_t)name }; 2120 2121 ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL); 2122 if (ifp != NULL) 2123 *idp = (uint_t)ifp->if_index; 2124 } 2125 2126 mutex_exit(&ips->ips_avl_lock); 2127 ipnet_rele(ips); 2128 2129 if (ifp == NULL) 2130 return (ESRCH); 2131 return (0); 2132 } 2133 2134 /* 2135 * Strictly speaking, there is no such thing as a "client" in ipnet, like 2136 * there is in mac. BPF only needs to have this because it is required as 2137 * part of interfacing correctly with mac. The reuse of the original 2138 * ipnetif_t as a client poses no danger, so long as it is done with its 2139 * own ref-count'd hold that is given up on close. 2140 */ 2141 int 2142 ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result) 2143 { 2144 ASSERT(ptr != NULL); 2145 ASSERT(result != NULL); 2146 ipnetif_refhold(ptr); 2147 *result = ptr; 2148 2149 return (0); 2150 } 2151 2152 void 2153 ipnet_client_close(ipnetif_t *ptr) 2154 { 2155 ASSERT(ptr != NULL); 2156 ipnetif_refrele(ptr); 2157 } 2158 2159 /* 2160 * This is called from BPF when it needs to start receiving packets 2161 * from ipnet. 2162 * 2163 * The use of the ipnet_t structure here is somewhat lightweight when 2164 * compared to how it is used elsewhere but it already has all of the 2165 * right fields in it, so reuse here doesn't seem out of order. Its 2166 * primary purpose here is to provide the means to store pointers for 2167 * use when ipnet_promisc_remove() needs to be called. 2168 * 2169 * This should never be called for the IPNET_MINOR_LO device as it is 2170 * never created via ipnetif_create. 2171 */ 2172 /*ARGSUSED*/ 2173 int 2174 ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle, 2175 int flags) 2176 { 2177 ip_stack_t *ipst; 2178 netstack_t *ns; 2179 ipnetif_t *ifp; 2180 ipnet_t *ipnet; 2181 char name[32]; 2182 int error; 2183 2184 ifp = (ipnetif_t *)handle; 2185 ns = netstack_find_by_zoneid(ifp->if_zoneid); 2186 2187 if ((how == DL_PROMISC_PHYS) || (how == DL_PROMISC_MULTI)) { 2188 error = ipnet_join_allmulti(ifp, ns->netstack_ipnet); 2189 if (error != 0) 2190 return (error); 2191 } else { 2192 return (EINVAL); 2193 } 2194 2195 ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP); 2196 ipnet->ipnet_if = ifp; 2197 ipnet->ipnet_ns = ns; 2198 ipnet->ipnet_flags = flags; 2199 2200 if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) { 2201 ipnet->ipnet_acceptfn = ipnet_loaccept; 2202 } else { 2203 ipnet->ipnet_acceptfn = ipnet_accept; 2204 } 2205 2206 /* 2207 * To register multiple hooks with the same callback function, 2208 * a unique name is needed. 2209 */ 2210 HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet); 2211 (void) snprintf(name, sizeof (name), "ipnet_promisc_%p", 2212 (void *)ipnet->ipnet_hook); 2213 ipnet->ipnet_hook->h_name = strdup(name); 2214 ipnet->ipnet_data = data; 2215 ipnet->ipnet_zoneid = ifp->if_zoneid; 2216 2217 ipst = ns->netstack_ip; 2218 2219 error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, 2220 ipnet->ipnet_hook); 2221 if (error != 0) 2222 goto regfail; 2223 2224 error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, 2225 ipnet->ipnet_hook); 2226 if (error != 0) { 2227 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, 2228 NH_OBSERVE, ipnet->ipnet_hook); 2229 goto regfail; 2230 } 2231 2232 *mhandle = (uintptr_t)ipnet; 2233 2234 return (0); 2235 2236 regfail: 2237 cmn_err(CE_WARN, "net_hook_register failed: %d", error); 2238 strfree(ipnet->ipnet_hook->h_name); 2239 hook_free(ipnet->ipnet_hook); 2240 ipnet_leave_allmulti(ifp, ns->netstack_ipnet); 2241 netstack_rele(ns); 2242 return (error); 2243 } 2244 2245 void 2246 ipnet_promisc_remove(void *data) 2247 { 2248 ip_stack_t *ipst; 2249 ipnet_t *ipnet; 2250 hook_t *hook; 2251 2252 ipnet = data; 2253 ipst = ipnet->ipnet_ns->netstack_ip; 2254 hook = ipnet->ipnet_hook; 2255 2256 VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, 2257 hook) == 0); 2258 2259 VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, 2260 hook) == 0); 2261 2262 strfree(hook->h_name); 2263 2264 hook_free(hook); 2265 2266 ipnet_leave_allmulti(ipnet->ipnet_if, ipnet->ipnet_ns->netstack_ipnet); 2267 2268 netstack_rele(ipnet->ipnet_ns); 2269 2270 kmem_free(ipnet, sizeof (*ipnet)); 2271 } 2272 2273 /* 2274 * arg here comes from the ipnet_t allocated in ipnet_promisc_add. 2275 * An important field from that structure is "ipnet_data" that 2276 * contains the "data" pointer passed into ipnet_promisc_add: it needs 2277 * to be passed back to bpf when we call into ipnet_itap. 2278 * 2279 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called 2280 * from BPF. 2281 */ 2282 /*ARGSUSED*/ 2283 static int 2284 ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg) 2285 { 2286 hook_pkt_observe_t *hdr; 2287 ipnet_addrp_t src; 2288 ipnet_addrp_t dst; 2289 ipnet_stack_t *ips; 2290 ipnet_t *ipnet; 2291 mblk_t *netmp; 2292 mblk_t *mp; 2293 2294 hdr = (hook_pkt_observe_t *)info; 2295 mp = hdr->hpo_pkt; 2296 ipnet = (ipnet_t *)arg; 2297 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 2298 2299 netmp = hdr->hpo_pkt->b_cont; 2300 src.iap_family = hdr->hpo_family; 2301 dst.iap_family = hdr->hpo_family; 2302 2303 if (hdr->hpo_family == AF_INET) { 2304 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src; 2305 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst; 2306 } else { 2307 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src; 2308 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst; 2309 } 2310 2311 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) { 2312 IPSK_BUMP(ips, ik_acceptFail); 2313 return (0); 2314 } 2315 IPSK_BUMP(ips, ik_acceptOk); 2316 2317 ipnet_itap(ipnet->ipnet_data, mp, 2318 hdr->hpo_htype == htons(IPOBS_HOOK_OUTBOUND), 2319 ntohl(hdr->hpo_pktlen) + MBLKL(mp)); 2320 2321 return (0); 2322 } 2323 2324 /* 2325 * clone'd ipnetif_t's are created when a shared IP instance zone comes 2326 * to life and configures an IP address. The model that BPF uses is that 2327 * each interface must have a unique pointer and each interface must be 2328 * representative of what it can capture. They are limited to one DLT 2329 * per interface and one zone per interface. Thus every interface that 2330 * can be seen in a zone must be announced via an attach to bpf. For 2331 * shared instance zones, this means the ipnet driver needs to detect 2332 * when an address is added to an interface in a zone for the first 2333 * time (and also when the last address is removed.) 2334 */ 2335 static ipnetif_t * 2336 ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid) 2337 { 2338 uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name }; 2339 ipnet_stack_t *ips = ifp->if_stackp; 2340 avl_index_t where = 0; 2341 ipnetif_t *newif; 2342 2343 mutex_enter(&ips->ips_avl_lock); 2344 newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where); 2345 if (newif != NULL) { 2346 ipnetif_refhold(newif); 2347 newif->if_sharecnt++; 2348 mutex_exit(&ips->ips_avl_lock); 2349 return (newif); 2350 } 2351 2352 newif = ipnet_alloc_if(ips); 2353 if (newif == NULL) { 2354 mutex_exit(&ips->ips_avl_lock); 2355 return (NULL); 2356 } 2357 2358 newif->if_refcnt = 1; 2359 newif->if_sharecnt = 1; 2360 newif->if_zoneid = zoneid; 2361 (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ); 2362 newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK; 2363 newif->if_index = ifp->if_index; 2364 2365 avl_insert(&ips->ips_avl_by_shared, newif, where); 2366 mutex_exit(&ips->ips_avl_lock); 2367 2368 return (newif); 2369 } 2370 2371 static void 2372 ipnetif_clone_release(ipnetif_t *ipnetif) 2373 { 2374 boolean_t dofree = B_FALSE; 2375 boolean_t doremove = B_FALSE; 2376 ipnet_stack_t *ips = ipnetif->if_stackp; 2377 2378 mutex_enter(&ipnetif->if_reflock); 2379 ASSERT(ipnetif->if_refcnt > 0); 2380 if (--ipnetif->if_refcnt == 0) 2381 dofree = B_TRUE; 2382 ASSERT(ipnetif->if_sharecnt > 0); 2383 if (--ipnetif->if_sharecnt == 0) 2384 doremove = B_TRUE; 2385 mutex_exit(&ipnetif->if_reflock); 2386 if (doremove) { 2387 mutex_enter(&ips->ips_avl_lock); 2388 avl_remove(&ips->ips_avl_by_shared, ipnetif); 2389 mutex_exit(&ips->ips_avl_lock); 2390 } 2391 if (dofree) { 2392 ASSERT(ipnetif->if_sharecnt == 0); 2393 ipnetif_free(ipnetif); 2394 } 2395 } 2396