1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * The ipnet device defined here provides access to packets at the IP layer. To 29 * provide access to packets at this layer it registers a callback function in 30 * the ip module and when there are open instances of the device ip will pass 31 * packets into the device. Packets from ip are passed on the input, output and 32 * loopback paths. Internally the module returns to ip as soon as possible by 33 * deferring processing using a taskq. 34 * 35 * Management of the devices in /dev/ipnet/ is handled by the devname 36 * filesystem and use of the neti interfaces. This module registers for NIC 37 * events using the neti framework so that when IP interfaces are bought up, 38 * taken down etc. the ipnet module is notified and its view of the interfaces 39 * configured on the system adjusted. On attach, the module gets an initial 40 * view of the system again using the neti framework but as it has already 41 * registered for IP interface events, it is still up-to-date with any changes. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/conf.h> 46 #include <sys/cred.h> 47 #include <sys/stat.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/modctl.h> 51 #include <sys/dlpi.h> 52 #include <sys/strsun.h> 53 #include <sys/id_space.h> 54 #include <sys/kmem.h> 55 #include <sys/mkdev.h> 56 #include <sys/neti.h> 57 #include <net/if.h> 58 #include <sys/errno.h> 59 #include <sys/list.h> 60 #include <sys/ksynch.h> 61 #include <sys/hook_event.h> 62 #include <sys/sdt.h> 63 #include <sys/stropts.h> 64 #include <sys/sysmacros.h> 65 #include <inet/ip.h> 66 #include <inet/ip_if.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip6.h> 69 #include <inet/ipnet.h> 70 #include <net/bpf.h> 71 #include <net/bpfdesc.h> 72 #include <net/dlt.h> 73 74 static struct module_info ipnet_minfo = { 75 1, /* mi_idnum */ 76 "ipnet", /* mi_idname */ 77 0, /* mi_minpsz */ 78 INFPSZ, /* mi_maxpsz */ 79 2048, /* mi_hiwat */ 80 0 /* mi_lowat */ 81 }; 82 83 /* 84 * List to hold static view of ipnetif_t's on the system. This is needed to 85 * avoid holding the lock protecting the avl tree of ipnetif's over the 86 * callback into the dev filesystem. 87 */ 88 typedef struct ipnetif_cbdata { 89 char ic_ifname[LIFNAMSIZ]; 90 dev_t ic_dev; 91 list_node_t ic_next; 92 } ipnetif_cbdata_t; 93 94 /* 95 * Convenience enumerated type for ipnet_accept(). It describes the 96 * properties of a given ipnet_addrp_t relative to a single ipnet_t 97 * client stream. The values represent whether the address is ... 98 */ 99 typedef enum { 100 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ 101 IPNETADDR_MBCAST, /* a multicast or broadcast address. */ 102 IPNETADDR_UNKNOWN /* none of the above. */ 103 } ipnet_addrtype_t; 104 105 /* Argument used for the ipnet_nicevent_taskq callback. */ 106 typedef struct ipnet_nicevent_s { 107 nic_event_t ipne_event; 108 net_handle_t ipne_protocol; 109 netstackid_t ipne_stackid; 110 uint64_t ipne_ifindex; 111 uint64_t ipne_lifindex; 112 char ipne_ifname[LIFNAMSIZ]; 113 } ipnet_nicevent_t; 114 115 static dev_info_t *ipnet_dip; 116 static major_t ipnet_major; 117 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ 118 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ 119 static id_space_t *ipnet_minor_space; 120 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ 121 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ 122 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; 123 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; 124 static bpf_itap_fn_t ipnet_itap; 125 126 static void ipnet_input(mblk_t *); 127 static int ipnet_wput(queue_t *, mblk_t *); 128 static int ipnet_rsrv(queue_t *); 129 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); 130 static int ipnet_close(queue_t *); 131 static void ipnet_ioctl(queue_t *, mblk_t *); 132 static void ipnet_iocdata(queue_t *, mblk_t *); 133 static void ipnet_wputnondata(queue_t *, mblk_t *); 134 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); 135 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); 136 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 137 static void ipnet_inforeq(queue_t *q, mblk_t *mp); 138 static void ipnet_bindreq(queue_t *q, mblk_t *mp); 139 static void ipnet_unbindreq(queue_t *q, mblk_t *mp); 140 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); 141 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); 142 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); 143 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); 144 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); 145 static void ipnet_nicevent_task(void *); 146 static ipnetif_t *ipnetif_create(const char *, uint64_t, ipnet_stack_t *, 147 uint64_t); 148 static void ipnetif_remove(ipnetif_t *, ipnet_stack_t *); 149 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); 150 static ipnetif_t *ipnetif_getby_index(uint64_t, ipnet_stack_t *); 151 static ipnetif_t *ipnetif_getby_dev(dev_t, ipnet_stack_t *); 152 static boolean_t ipnetif_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); 153 static void ipnetif_zonecheck(ipnetif_t *, ipnet_stack_t *); 154 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); 155 static int ipnetif_compare_name(const void *, const void *); 156 static int ipnetif_compare_name_zone(const void *, const void *); 157 static int ipnetif_compare_index(const void *, const void *); 158 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); 159 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); 160 static void ipnetif_refhold(ipnetif_t *); 161 static void ipnetif_refrele(ipnetif_t *); 162 static void ipnet_walkers_inc(ipnet_stack_t *); 163 static void ipnet_walkers_dec(ipnet_stack_t *); 164 static void ipnet_register_netihook(ipnet_stack_t *); 165 static void *ipnet_stack_init(netstackid_t, netstack_t *); 166 static void ipnet_stack_fini(netstackid_t, void *); 167 static void ipnet_dispatch(void *); 168 static int ipobs_bounce_func(hook_event_token_t, hook_data_t, void *); 169 static int ipnet_bpf_bounce(hook_event_token_t, hook_data_t, void *); 170 static ipnetif_t *ipnetif_clone_create(ipnetif_t *, zoneid_t); 171 static void ipnetif_clone_release(ipnetif_t *); 172 173 static struct qinit ipnet_rinit = { 174 NULL, /* qi_putp */ 175 ipnet_rsrv, /* qi_srvp */ 176 ipnet_open, /* qi_qopen */ 177 ipnet_close, /* qi_qclose */ 178 NULL, /* qi_qadmin */ 179 &ipnet_minfo, /* qi_minfo */ 180 }; 181 182 static struct qinit ipnet_winit = { 183 ipnet_wput, /* qi_putp */ 184 NULL, /* qi_srvp */ 185 NULL, /* qi_qopen */ 186 NULL, /* qi_qclose */ 187 NULL, /* qi_qadmin */ 188 &ipnet_minfo, /* qi_minfo */ 189 }; 190 191 static struct streamtab ipnet_info = { 192 &ipnet_rinit, &ipnet_winit 193 }; 194 195 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, 196 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, 197 ddi_quiesce_not_supported); 198 199 static struct modldrv modldrv = { 200 &mod_driverops, 201 "STREAMS ipnet driver", 202 &ipnet_ops 203 }; 204 205 static struct modlinkage modlinkage = { 206 MODREV_1, &modldrv, NULL 207 }; 208 209 /* 210 * This structure contains the template data (names and type) that is 211 * copied, in bulk, into the new kstats structure created by net_kstat_create. 212 * No actual statistical information is stored in this instance of the 213 * ipnet_kstats_t structure. 214 */ 215 static ipnet_kstats_t stats_template = { 216 { "duplicationFail", KSTAT_DATA_UINT64 }, 217 { "dispatchOk", KSTAT_DATA_UINT64 }, 218 { "dispatchFail", KSTAT_DATA_UINT64 }, 219 { "dispatchHeaderDrop", KSTAT_DATA_UINT64 }, 220 { "dispatchDupDrop", KSTAT_DATA_UINT64 }, 221 { "dispatchDeliver", KSTAT_DATA_UINT64 }, 222 { "acceptOk", KSTAT_DATA_UINT64 }, 223 { "acceptFail", KSTAT_DATA_UINT64 } 224 }; 225 226 /* 227 * Walk the list of physical interfaces on the machine, for each 228 * interface create a new ipnetif_t and add any addresses to it. We 229 * need to do the walk twice, once for IPv4 and once for IPv6. 230 * 231 * The interfaces are destroyed as part of ipnet_stack_fini() for each 232 * stack. Note that we cannot do this initialization in 233 * ipnet_stack_init(), since ipnet_stack_init() cannot fail. 234 */ 235 static int 236 ipnetif_init(void) 237 { 238 netstack_handle_t nh; 239 netstack_t *ns; 240 ipnet_stack_t *ips; 241 int ret = 0; 242 243 netstack_next_init(&nh); 244 while ((ns = netstack_next(&nh)) != NULL) { 245 ips = ns->netstack_ipnet; 246 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0) 247 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE); 248 netstack_rele(ns); 249 if (ret != 0) 250 break; 251 } 252 netstack_next_fini(&nh); 253 return (ret); 254 } 255 256 /* 257 * Standard module entry points. 258 */ 259 int 260 _init(void) 261 { 262 int ret; 263 boolean_t netstack_registered = B_FALSE; 264 265 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) 266 return (ENODEV); 267 ipnet_minor_space = id_space_create("ipnet_minor_space", 268 IPNET_MINOR_MIN, MAXMIN32); 269 270 /* 271 * We call ddi_taskq_create() with nthread == 1 to ensure in-order 272 * delivery of packets to clients. Note that we need to create the 273 * taskqs before calling netstack_register() since ipnet_stack_init() 274 * registers callbacks that use 'em. 275 */ 276 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); 277 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", 278 1, TASKQ_DEFAULTPRI, 0); 279 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { 280 ret = ENOMEM; 281 goto done; 282 } 283 284 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); 285 netstack_registered = B_TRUE; 286 287 if ((ret = ipnetif_init()) == 0) 288 ret = mod_install(&modlinkage); 289 done: 290 if (ret != 0) { 291 if (ipnet_taskq != NULL) 292 ddi_taskq_destroy(ipnet_taskq); 293 if (ipnet_nicevent_taskq != NULL) 294 ddi_taskq_destroy(ipnet_nicevent_taskq); 295 if (netstack_registered) 296 netstack_unregister(NS_IPNET); 297 id_space_destroy(ipnet_minor_space); 298 } 299 return (ret); 300 } 301 302 int 303 _fini(void) 304 { 305 int err; 306 307 if ((err = mod_remove(&modlinkage)) != 0) 308 return (err); 309 310 netstack_unregister(NS_IPNET); 311 ddi_taskq_destroy(ipnet_nicevent_taskq); 312 ddi_taskq_destroy(ipnet_taskq); 313 id_space_destroy(ipnet_minor_space); 314 return (0); 315 } 316 317 int 318 _info(struct modinfo *modinfop) 319 { 320 return (mod_info(&modlinkage, modinfop)); 321 } 322 323 static void 324 ipnet_register_netihook(ipnet_stack_t *ips) 325 { 326 int ret; 327 zoneid_t zoneid; 328 netid_t netid; 329 330 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", 331 ips); 332 333 /* 334 * It is possible for an exclusive stack to be in the process of 335 * shutting down here, and the netid and protocol lookups could fail 336 * in that case. 337 */ 338 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid); 339 if ((netid = net_zoneidtonetid(zoneid)) == -1) 340 return; 341 342 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) { 343 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, 344 ips->ips_nicevents)) != 0) { 345 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 346 ips->ips_ndv4 = NULL; 347 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks" 348 " in zone %d: %d", zoneid, ret); 349 } 350 } 351 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) { 352 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, 353 ips->ips_nicevents)) != 0) { 354 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 355 ips->ips_ndv6 = NULL; 356 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks" 357 " in zone %d: %d", zoneid, ret); 358 } 359 } 360 361 /* 362 * Create a local set of kstats for each zone. 363 */ 364 ips->ips_kstatp = net_kstat_create(netid, "ipnet", 0, "ipnet_stats", 365 "misc", KSTAT_TYPE_NAMED, 366 sizeof (ipnet_kstats_t) / sizeof (kstat_named_t), 0); 367 if (ips->ips_kstatp != NULL) { 368 bcopy(&stats_template, &ips->ips_stats, 369 sizeof (ips->ips_stats)); 370 ips->ips_kstatp->ks_data = &ips->ips_stats; 371 ips->ips_kstatp->ks_private = 372 (void *)(uintptr_t)ips->ips_netstack->netstack_stackid; 373 kstat_install(ips->ips_kstatp); 374 } else { 375 cmn_err(CE_WARN, "net_kstat_create(%s,%s,%s) failed", 376 "ipnet", "ipnet_stats", "misc"); 377 } 378 } 379 380 /* 381 * This function is called on attach to build an initial view of the 382 * interfaces on the system. It will be called once for IPv4 and once 383 * for IPv6, although there is only one ipnet interface for both IPv4 384 * and IPv6 there are separate address lists. 385 */ 386 static int 387 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) 388 { 389 phy_if_t phyif; 390 lif_if_t lif; 391 ipnetif_t *ipnetif; 392 char name[LIFNAMSIZ]; 393 boolean_t new_if = B_FALSE; 394 uint64_t ifflags; 395 int ret = 0; 396 397 /* 398 * If ipnet_register_netihook() was unable to initialize this 399 * stack's net_handle_t, then we cannot populate any interface 400 * information. This usually happens when we attempted to 401 * grab a net_handle_t as a stack was shutting down. We don't 402 * want to fail the entire _init() operation because of a 403 * stack shutdown (other stacks will continue to work just 404 * fine), so we silently return success here. 405 */ 406 if (nd == NULL) 407 return (0); 408 409 /* 410 * Make sure we're not processing NIC events during the 411 * population of our interfaces and address lists. 412 */ 413 mutex_enter(&ips->ips_event_lock); 414 415 for (phyif = net_phygetnext(nd, 0); phyif != 0; 416 phyif = net_phygetnext(nd, phyif)) { 417 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) 418 continue; 419 ifflags = 0; 420 (void) net_getlifflags(nd, phyif, 0, &ifflags); 421 if ((ipnetif = ipnetif_getby_index(phyif, ips)) == NULL) { 422 ipnetif = ipnetif_create(name, phyif, ips, ifflags); 423 if (ipnetif == NULL) { 424 ret = ENOMEM; 425 goto done; 426 } 427 new_if = B_TRUE; 428 } 429 ipnetif->if_flags |= 430 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 431 432 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; 433 lif = net_lifgetnext(nd, phyif, lif)) { 434 /* 435 * Skip addresses that aren't up. We'll add 436 * them when we receive an NE_LIF_UP event. 437 */ 438 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || 439 !(ifflags & IFF_UP)) 440 continue; 441 /* Don't add it if we already have it. */ 442 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) 443 continue; 444 ipnet_add_ifaddr(lif, ipnetif, nd); 445 } 446 if (!new_if) 447 ipnetif_refrele(ipnetif); 448 } 449 450 done: 451 mutex_exit(&ips->ips_event_lock); 452 return (ret); 453 } 454 455 static int 456 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 457 { 458 if (cmd != DDI_ATTACH) 459 return (DDI_FAILURE); 460 461 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, 462 DDI_PSEUDO, 0) == DDI_FAILURE) 463 return (DDI_FAILURE); 464 465 ipnet_dip = dip; 466 return (DDI_SUCCESS); 467 } 468 469 static int 470 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 471 { 472 if (cmd != DDI_DETACH) 473 return (DDI_FAILURE); 474 475 ASSERT(dip == ipnet_dip); 476 ddi_remove_minor_node(ipnet_dip, NULL); 477 ipnet_dip = NULL; 478 return (DDI_SUCCESS); 479 } 480 481 /* ARGSUSED */ 482 static int 483 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 484 { 485 int error = DDI_FAILURE; 486 487 switch (infocmd) { 488 case DDI_INFO_DEVT2INSTANCE: 489 *result = (void *)0; 490 error = DDI_SUCCESS; 491 break; 492 case DDI_INFO_DEVT2DEVINFO: 493 if (ipnet_dip != NULL) { 494 *result = ipnet_dip; 495 error = DDI_SUCCESS; 496 } 497 break; 498 } 499 return (error); 500 } 501 502 /* ARGSUSED */ 503 static int 504 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) 505 { 506 ipnet_t *ipnet; 507 netstack_t *ns = NULL; 508 ipnet_stack_t *ips; 509 int err = 0; 510 zoneid_t zoneid = crgetzoneid(crp); 511 512 /* 513 * If the system is labeled, only the global zone is allowed to open 514 * IP observability nodes. 515 */ 516 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 517 return (EACCES); 518 519 /* We don't support open as a module */ 520 if (sflag & MODOPEN) 521 return (ENOTSUP); 522 523 /* This driver is self-cloning, we don't support re-open. */ 524 if (rq->q_ptr != NULL) 525 return (EBUSY); 526 527 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) 528 return (ENOMEM); 529 530 VERIFY((ns = netstack_find_by_cred(crp)) != NULL); 531 ips = ns->netstack_ipnet; 532 533 rq->q_ptr = WR(rq)->q_ptr = ipnet; 534 ipnet->ipnet_rq = rq; 535 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); 536 ipnet->ipnet_zoneid = zoneid; 537 ipnet->ipnet_dlstate = DL_UNBOUND; 538 ipnet->ipnet_ns = ns; 539 540 /* 541 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need 542 * to be processed after ipnet_if is set and the ipnet_t has been 543 * inserted in the ips_str_list. 544 */ 545 mutex_enter(&ips->ips_event_lock); 546 if (getminor(*dev) == IPNET_MINOR_LO) { 547 ipnet->ipnet_flags |= IPNET_LOMODE; 548 ipnet->ipnet_acceptfn = ipnet_loaccept; 549 } else { 550 ipnet->ipnet_acceptfn = ipnet_accept; 551 ipnet->ipnet_if = ipnetif_getby_dev(*dev, ips); 552 if (ipnet->ipnet_if == NULL || 553 !ipnetif_in_zone(ipnet->ipnet_if, zoneid, ips)) { 554 err = ENODEV; 555 goto done; 556 } 557 } 558 559 mutex_enter(&ips->ips_walkers_lock); 560 while (ips->ips_walkers_cnt != 0) 561 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 562 list_insert_head(&ips->ips_str_list, ipnet); 563 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); 564 qprocson(rq); 565 566 /* 567 * Only register our callback if we're the first open client; we call 568 * unregister in close() for the last open client. 569 */ 570 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) 571 ips->ips_hook = ipobs_register_hook(ns, ipnet_input); 572 mutex_exit(&ips->ips_walkers_lock); 573 574 done: 575 mutex_exit(&ips->ips_event_lock); 576 if (err != 0) { 577 netstack_rele(ns); 578 id_free(ipnet_minor_space, ipnet->ipnet_minor); 579 if (ipnet->ipnet_if != NULL) 580 ipnetif_refrele(ipnet->ipnet_if); 581 kmem_free(ipnet, sizeof (*ipnet)); 582 } 583 return (err); 584 } 585 586 static int 587 ipnet_close(queue_t *rq) 588 { 589 ipnet_t *ipnet = rq->q_ptr; 590 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; 591 592 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 593 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 594 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 595 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 596 597 mutex_enter(&ips->ips_walkers_lock); 598 while (ips->ips_walkers_cnt != 0) 599 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 600 601 qprocsoff(rq); 602 603 list_remove(&ips->ips_str_list, ipnet); 604 if (ipnet->ipnet_if != NULL) 605 ipnetif_refrele(ipnet->ipnet_if); 606 id_free(ipnet_minor_space, ipnet->ipnet_minor); 607 608 if (list_is_empty(&ips->ips_str_list)) { 609 ipobs_unregister_hook(ips->ips_netstack, ips->ips_hook); 610 ips->ips_hook = NULL; 611 } 612 613 kmem_free(ipnet, sizeof (*ipnet)); 614 615 mutex_exit(&ips->ips_walkers_lock); 616 netstack_rele(ips->ips_netstack); 617 return (0); 618 } 619 620 static int 621 ipnet_wput(queue_t *q, mblk_t *mp) 622 { 623 switch (mp->b_datap->db_type) { 624 case M_FLUSH: 625 if (*mp->b_rptr & FLUSHW) { 626 flushq(q, FLUSHDATA); 627 *mp->b_rptr &= ~FLUSHW; 628 } 629 if (*mp->b_rptr & FLUSHR) 630 qreply(q, mp); 631 else 632 freemsg(mp); 633 break; 634 case M_PROTO: 635 case M_PCPROTO: 636 ipnet_wputnondata(q, mp); 637 break; 638 case M_IOCTL: 639 ipnet_ioctl(q, mp); 640 break; 641 case M_IOCDATA: 642 ipnet_iocdata(q, mp); 643 break; 644 default: 645 freemsg(mp); 646 break; 647 } 648 return (0); 649 } 650 651 static int 652 ipnet_rsrv(queue_t *q) 653 { 654 mblk_t *mp; 655 656 while ((mp = getq(q)) != NULL) { 657 ASSERT(DB_TYPE(mp) == M_DATA); 658 if (canputnext(q)) { 659 putnext(q, mp); 660 } else { 661 (void) putbq(q, mp); 662 break; 663 } 664 } 665 return (0); 666 } 667 668 static void 669 ipnet_ioctl(queue_t *q, mblk_t *mp) 670 { 671 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 672 673 switch (iocp->ioc_cmd) { 674 case DLIOCRAW: 675 miocack(q, mp, 0, 0); 676 break; 677 case DLIOCIPNETINFO: 678 if (iocp->ioc_count == TRANSPARENT) { 679 mcopyin(mp, NULL, sizeof (uint_t), NULL); 680 qreply(q, mp); 681 break; 682 } 683 /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */ 684 default: 685 miocnak(q, mp, 0, EINVAL); 686 break; 687 } 688 } 689 690 static void 691 ipnet_iocdata(queue_t *q, mblk_t *mp) 692 { 693 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 694 ipnet_t *ipnet = q->q_ptr; 695 696 switch (iocp->ioc_cmd) { 697 case DLIOCIPNETINFO: 698 if (*(int *)mp->b_cont->b_rptr == 1) 699 ipnet->ipnet_flags |= IPNET_INFO; 700 else if (*(int *)mp->b_cont->b_rptr == 0) 701 ipnet->ipnet_flags &= ~IPNET_INFO; 702 else 703 goto iocnak; 704 miocack(q, mp, 0, DL_IPNETINFO_VERSION); 705 break; 706 default: 707 iocnak: 708 miocnak(q, mp, 0, EINVAL); 709 break; 710 } 711 } 712 713 static void 714 ipnet_wputnondata(queue_t *q, mblk_t *mp) 715 { 716 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 717 t_uscalar_t prim = dlp->dl_primitive; 718 719 switch (prim) { 720 case DL_INFO_REQ: 721 ipnet_inforeq(q, mp); 722 break; 723 case DL_UNBIND_REQ: 724 ipnet_unbindreq(q, mp); 725 break; 726 case DL_BIND_REQ: 727 ipnet_bindreq(q, mp); 728 break; 729 case DL_PROMISCON_REQ: 730 ipnet_dlpromisconreq(q, mp); 731 break; 732 case DL_PROMISCOFF_REQ: 733 ipnet_dlpromiscoffreq(q, mp); 734 break; 735 case DL_UNITDATA_REQ: 736 case DL_DETACH_REQ: 737 case DL_PHYS_ADDR_REQ: 738 case DL_SET_PHYS_ADDR_REQ: 739 case DL_ENABMULTI_REQ: 740 case DL_DISABMULTI_REQ: 741 case DL_ATTACH_REQ: 742 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); 743 break; 744 default: 745 dlerrorack(q, mp, prim, DL_BADPRIM, 0); 746 break; 747 } 748 } 749 750 static void 751 ipnet_inforeq(queue_t *q, mblk_t *mp) 752 { 753 dl_info_ack_t *dlip; 754 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); 755 756 if (MBLKL(mp) < DL_INFO_REQ_SIZE) { 757 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); 758 return; 759 } 760 761 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) 762 return; 763 764 dlip = (dl_info_ack_t *)mp->b_rptr; 765 *dlip = ipnet_infoack; 766 qreply(q, mp); 767 } 768 769 static void 770 ipnet_bindreq(queue_t *q, mblk_t *mp) 771 { 772 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 773 ipnet_t *ipnet = q->q_ptr; 774 775 if (MBLKL(mp) < DL_BIND_REQ_SIZE) { 776 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); 777 return; 778 } 779 780 switch (dlp->bind_req.dl_sap) { 781 case 0 : 782 ipnet->ipnet_family = AF_UNSPEC; 783 break; 784 case IPV4_VERSION : 785 ipnet->ipnet_family = AF_INET; 786 break; 787 case IPV6_VERSION : 788 ipnet->ipnet_family = AF_INET6; 789 break; 790 default : 791 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); 792 return; 793 /*NOTREACHED*/ 794 } 795 796 ipnet->ipnet_dlstate = DL_IDLE; 797 dlbindack(q, mp, dlp->bind_req.dl_sap, 0, 0, 0, 0); 798 } 799 800 static void 801 ipnet_unbindreq(queue_t *q, mblk_t *mp) 802 { 803 ipnet_t *ipnet = q->q_ptr; 804 805 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { 806 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); 807 return; 808 } 809 810 if (ipnet->ipnet_dlstate != DL_IDLE) { 811 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); 812 } else { 813 ipnet->ipnet_dlstate = DL_UNBOUND; 814 ipnet->ipnet_family = AF_UNSPEC; 815 dlokack(q, mp, DL_UNBIND_REQ); 816 } 817 } 818 819 static void 820 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) 821 { 822 ipnet_t *ipnet = q->q_ptr; 823 t_uscalar_t level; 824 int err; 825 826 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { 827 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 828 return; 829 } 830 831 if (ipnet->ipnet_flags & IPNET_LOMODE) { 832 dlokack(q, mp, DL_PROMISCON_REQ); 833 return; 834 } 835 836 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 837 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 838 if ((err = ipnet_join_allmulti(ipnet->ipnet_if, 839 ipnet->ipnet_ns->netstack_ipnet)) != 0) { 840 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); 841 return; 842 } 843 } 844 845 switch (level) { 846 case DL_PROMISC_PHYS: 847 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; 848 break; 849 case DL_PROMISC_SAP: 850 ipnet->ipnet_flags |= IPNET_PROMISC_SAP; 851 break; 852 case DL_PROMISC_MULTI: 853 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; 854 break; 855 default: 856 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 857 return; 858 } 859 860 dlokack(q, mp, DL_PROMISCON_REQ); 861 } 862 863 static void 864 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) 865 { 866 ipnet_t *ipnet = q->q_ptr; 867 t_uscalar_t level; 868 uint16_t orig_ipnet_flags = ipnet->ipnet_flags; 869 870 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { 871 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 872 return; 873 } 874 875 if (ipnet->ipnet_flags & IPNET_LOMODE) { 876 dlokack(q, mp, DL_PROMISCOFF_REQ); 877 return; 878 } 879 880 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 881 switch (level) { 882 case DL_PROMISC_PHYS: 883 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 884 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; 885 break; 886 case DL_PROMISC_SAP: 887 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) 888 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; 889 break; 890 case DL_PROMISC_MULTI: 891 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 892 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; 893 break; 894 default: 895 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 896 return; 897 } 898 899 if (orig_ipnet_flags == ipnet->ipnet_flags) { 900 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); 901 return; 902 } 903 904 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 905 ipnet_leave_allmulti(ipnet->ipnet_if, 906 ipnet->ipnet_ns->netstack_ipnet); 907 } 908 909 dlokack(q, mp, DL_PROMISCOFF_REQ); 910 } 911 912 static int 913 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 914 { 915 int err = 0; 916 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 917 uint64_t index = ipnetif->if_index; 918 919 mutex_enter(&ips->ips_event_lock); 920 if (ipnetif->if_multicnt == 0) { 921 ASSERT((ipnetif->if_flags & 922 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); 923 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { 924 err = ip_join_allmulti(index, B_FALSE, ipst); 925 if (err != 0) 926 goto done; 927 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; 928 } 929 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { 930 err = ip_join_allmulti(index, B_TRUE, ipst); 931 if (err != 0 && 932 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { 933 (void) ip_leave_allmulti(index, B_FALSE, ipst); 934 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 935 goto done; 936 } 937 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; 938 } 939 } 940 ipnetif->if_multicnt++; 941 942 done: 943 mutex_exit(&ips->ips_event_lock); 944 return (err); 945 } 946 947 static void 948 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 949 { 950 int err; 951 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 952 uint64_t index = ipnetif->if_index; 953 954 mutex_enter(&ips->ips_event_lock); 955 ASSERT(ipnetif->if_multicnt != 0); 956 if (--ipnetif->if_multicnt == 0) { 957 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { 958 err = ip_leave_allmulti(index, B_FALSE, ipst); 959 ASSERT(err == 0 || err == ENODEV); 960 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 961 } 962 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { 963 err = ip_leave_allmulti(index, B_TRUE, ipst); 964 ASSERT(err == 0 || err == ENODEV); 965 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; 966 } 967 } 968 mutex_exit(&ips->ips_event_lock); 969 } 970 971 /* 972 * Allocate a new mblk_t and put a dl_ipnetinfo_t in it. 973 * The structure it copies the header information from, 974 * hook_pkt_observe_t, is constructed using network byte 975 * order in ipobs_hook(), so there is no conversion here. 976 */ 977 static mblk_t * 978 ipnet_addheader(hook_pkt_observe_t *hdr, mblk_t *mp) 979 { 980 mblk_t *dlhdr; 981 dl_ipnetinfo_t *dl; 982 983 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { 984 freemsg(mp); 985 return (NULL); 986 } 987 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; 988 dl->dli_version = DL_IPNETINFO_VERSION; 989 dl->dli_family = hdr->hpo_family; 990 dl->dli_htype = hdr->hpo_htype; 991 dl->dli_pktlen = hdr->hpo_pktlen; 992 dl->dli_ifindex = hdr->hpo_ifindex; 993 dl->dli_grifindex = hdr->hpo_grifindex; 994 dl->dli_zsrc = hdr->hpo_zsrc; 995 dl->dli_zdst = hdr->hpo_zdst; 996 dlhdr->b_wptr += sizeof (*dl); 997 dlhdr->b_cont = mp; 998 999 return (dlhdr); 1000 } 1001 1002 static ipnet_addrtype_t 1003 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) 1004 { 1005 list_t *list; 1006 ipnetif_t *ipnetif = ipnet->ipnet_if; 1007 ipnetif_addr_t *ifaddr; 1008 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; 1009 1010 /* First check if the address is multicast or limited broadcast. */ 1011 switch (addr->iap_family) { 1012 case AF_INET: 1013 if (CLASSD(*(addr->iap_addr4)) || 1014 *(addr->iap_addr4) == INADDR_BROADCAST) 1015 return (IPNETADDR_MBCAST); 1016 break; 1017 case AF_INET6: 1018 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) 1019 return (IPNETADDR_MBCAST); 1020 break; 1021 } 1022 1023 /* 1024 * Walk the address list to see if the address belongs to our 1025 * interface or is one of our subnet broadcast addresses. 1026 */ 1027 mutex_enter(&ipnetif->if_addr_lock); 1028 list = (addr->iap_family == AF_INET) ? 1029 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; 1030 for (ifaddr = list_head(list); 1031 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; 1032 ifaddr = list_next(list, ifaddr)) { 1033 /* 1034 * If we're not in the global zone, then only look at 1035 * addresses in our zone. 1036 */ 1037 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1038 ipnet->ipnet_zoneid != ifaddr->ifa_zone) 1039 continue; 1040 switch (addr->iap_family) { 1041 case AF_INET: 1042 if (ifaddr->ifa_ip4addr != INADDR_ANY && 1043 *(addr->iap_addr4) == ifaddr->ifa_ip4addr) 1044 addrtype = IPNETADDR_MYADDR; 1045 else if (ifaddr->ifa_brdaddr != INADDR_ANY && 1046 *(addr->iap_addr4) == ifaddr->ifa_brdaddr) 1047 addrtype = IPNETADDR_MBCAST; 1048 break; 1049 case AF_INET6: 1050 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, 1051 &ifaddr->ifa_ip6addr)) 1052 addrtype = IPNETADDR_MYADDR; 1053 break; 1054 } 1055 } 1056 mutex_exit(&ipnetif->if_addr_lock); 1057 1058 return (addrtype); 1059 } 1060 1061 /* 1062 * Verify if the packet contained in hdr should be passed up to the 1063 * ipnet client stream. 1064 */ 1065 static boolean_t 1066 ipnet_accept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src, 1067 ipnet_addrp_t *dst) 1068 { 1069 boolean_t obsif; 1070 uint64_t ifindex = ipnet->ipnet_if->if_index; 1071 ipnet_addrtype_t srctype; 1072 ipnet_addrtype_t dsttype; 1073 1074 srctype = ipnet_get_addrtype(ipnet, src); 1075 dsttype = ipnet_get_addrtype(ipnet, dst); 1076 1077 /* 1078 * If the packet's ifindex matches ours, or the packet's group ifindex 1079 * matches ours, it's on the interface we're observing. (Thus, 1080 * observing on the group ifindex matches all ifindexes in the group.) 1081 */ 1082 obsif = (ntohl(hdr->hpo_ifindex) == ifindex || 1083 ntohl(hdr->hpo_grifindex) == ifindex); 1084 1085 DTRACE_PROBE5(ipnet_accept__addr, 1086 ipnet_addrtype_t, srctype, ipnet_addrp_t *, src, 1087 ipnet_addrtype_t, dsttype, ipnet_addrp_t *, dst, 1088 boolean_t, obsif); 1089 1090 /* 1091 * Do not allow an ipnet stream to see packets that are not from or to 1092 * its zone. The exception is when zones are using the shared stack 1093 * model. In this case, streams in the global zone have visibility 1094 * into other shared-stack zones, and broadcast and multicast traffic 1095 * is visible by all zones in the stack. 1096 */ 1097 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1098 dsttype != IPNETADDR_MBCAST) { 1099 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) && 1100 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst)) 1101 return (B_FALSE); 1102 } 1103 1104 /* 1105 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the 1106 * packet's IP version. 1107 */ 1108 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && 1109 ipnet->ipnet_family != hdr->hpo_family) 1110 return (B_FALSE); 1111 1112 /* If the destination address is ours, then accept the packet. */ 1113 if (dsttype == IPNETADDR_MYADDR) 1114 return (B_TRUE); 1115 1116 /* 1117 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are 1118 * sent or received on the interface we're observing, or packets that 1119 * have our source address (this allows us to see packets we send). 1120 */ 1121 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { 1122 if (srctype == IPNETADDR_MYADDR || obsif) 1123 return (B_TRUE); 1124 } 1125 1126 /* 1127 * We accept multicast and broadcast packets transmitted or received 1128 * on the interface we're observing. 1129 */ 1130 if (dsttype == IPNETADDR_MBCAST && obsif) 1131 return (B_TRUE); 1132 1133 return (B_FALSE); 1134 } 1135 1136 /* 1137 * Verify if the packet contained in hdr should be passed up to the ipnet 1138 * client stream that's in IPNET_LOMODE. 1139 */ 1140 /* ARGSUSED */ 1141 static boolean_t 1142 ipnet_loaccept(ipnet_t *ipnet, hook_pkt_observe_t *hdr, ipnet_addrp_t *src, 1143 ipnet_addrp_t *dst) 1144 { 1145 if (hdr->hpo_htype != IPOBS_HOOK_LOCAL) { 1146 /* 1147 * ipnet_if is only NULL for IPNET_MINOR_LO devices. 1148 */ 1149 if (ipnet->ipnet_if == NULL) 1150 return (B_FALSE); 1151 } 1152 1153 /* 1154 * An ipnet stream must not see packets that are not from/to its zone. 1155 */ 1156 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { 1157 if (ipnet->ipnet_zoneid != ntohl(hdr->hpo_zsrc) && 1158 ipnet->ipnet_zoneid != ntohl(hdr->hpo_zdst)) 1159 return (B_FALSE); 1160 } 1161 1162 return (ipnet->ipnet_family == AF_UNSPEC || 1163 ipnet->ipnet_family == hdr->hpo_family); 1164 } 1165 1166 static void 1167 ipnet_dispatch(void *arg) 1168 { 1169 mblk_t *mp = arg; 1170 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr; 1171 ipnet_t *ipnet; 1172 mblk_t *netmp; 1173 list_t *list; 1174 ipnet_stack_t *ips; 1175 ipnet_addrp_t src; 1176 ipnet_addrp_t dst; 1177 1178 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 1179 1180 netmp = hdr->hpo_pkt->b_cont; 1181 src.iap_family = hdr->hpo_family; 1182 dst.iap_family = hdr->hpo_family; 1183 1184 if (hdr->hpo_family == AF_INET) { 1185 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src; 1186 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst; 1187 } else { 1188 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src; 1189 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst; 1190 } 1191 1192 ipnet_walkers_inc(ips); 1193 1194 list = &ips->ips_str_list; 1195 for (ipnet = list_head(list); ipnet != NULL; 1196 ipnet = list_next(list, ipnet)) { 1197 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) { 1198 IPSK_BUMP(ips, ik_acceptFail); 1199 continue; 1200 } 1201 IPSK_BUMP(ips, ik_acceptOk); 1202 1203 if (list_next(list, ipnet) == NULL) { 1204 netmp = hdr->hpo_pkt->b_cont; 1205 hdr->hpo_pkt->b_cont = NULL; 1206 } else { 1207 if ((netmp = dupmsg(hdr->hpo_pkt->b_cont)) == NULL && 1208 (netmp = copymsg(hdr->hpo_pkt->b_cont)) == NULL) { 1209 IPSK_BUMP(ips, ik_duplicationFail); 1210 continue; 1211 } 1212 } 1213 1214 if (ipnet->ipnet_flags & IPNET_INFO) { 1215 if ((netmp = ipnet_addheader(hdr, netmp)) == NULL) { 1216 IPSK_BUMP(ips, ik_dispatchHeaderDrop); 1217 continue; 1218 } 1219 } 1220 1221 if (ipnet->ipnet_rq->q_first == NULL && 1222 canputnext(ipnet->ipnet_rq)) { 1223 putnext(ipnet->ipnet_rq, netmp); 1224 IPSK_BUMP(ips, ik_dispatchDeliver); 1225 } else if (canput(ipnet->ipnet_rq)) { 1226 (void) putq(ipnet->ipnet_rq, netmp); 1227 IPSK_BUMP(ips, ik_dispatchDeliver); 1228 } else { 1229 freemsg(netmp); 1230 IPSK_BUMP(ips, ik_dispatchPutDrop); 1231 } 1232 } 1233 1234 ipnet_walkers_dec(ips); 1235 1236 freemsg(mp); 1237 } 1238 1239 static void 1240 ipnet_input(mblk_t *mp) 1241 { 1242 hook_pkt_observe_t *hdr = (hook_pkt_observe_t *)mp->b_rptr; 1243 ipnet_stack_t *ips; 1244 1245 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 1246 1247 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != 1248 DDI_SUCCESS) { 1249 IPSK_BUMP(ips, ik_dispatchFail); 1250 freemsg(mp); 1251 } else { 1252 IPSK_BUMP(ips, ik_dispatchOk); 1253 } 1254 } 1255 1256 static ipnetif_t * 1257 ipnet_alloc_if(ipnet_stack_t *ips) 1258 { 1259 ipnetif_t *ipnetif; 1260 1261 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) 1262 return (NULL); 1263 1264 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); 1265 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), 1266 offsetof(ipnetif_addr_t, ifa_link)); 1267 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), 1268 offsetof(ipnetif_addr_t, ifa_link)); 1269 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); 1270 1271 ipnetif->if_stackp = ips; 1272 1273 return (ipnetif); 1274 } 1275 1276 /* 1277 * Create a new ipnetif_t and new minor node for it. If creation is 1278 * successful the new ipnetif_t is inserted into an avl_tree 1279 * containing ipnetif's for this stack instance. 1280 */ 1281 static ipnetif_t * 1282 ipnetif_create(const char *name, uint64_t index, ipnet_stack_t *ips, 1283 uint64_t ifflags) 1284 { 1285 ipnetif_t *ipnetif; 1286 avl_index_t where = 0; 1287 minor_t ifminor; 1288 1289 /* 1290 * Because ipnetif_create() can be called from a NIC event 1291 * callback, it should not block. 1292 */ 1293 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); 1294 if (ifminor == (minor_t)-1) 1295 return (NULL); 1296 if ((ipnetif = ipnet_alloc_if(ips)) == NULL) { 1297 id_free(ipnet_minor_space, ifminor); 1298 return (NULL); 1299 } 1300 1301 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); 1302 ipnetif->if_index = (uint_t)index; 1303 ipnetif->if_zoneid = netstack_get_zoneid(ips->ips_netstack); 1304 ipnetif->if_dev = makedevice(ipnet_major, ifminor); 1305 1306 ipnetif->if_refcnt = 1; 1307 if ((ifflags & IFF_LOOPBACK) != 0) 1308 ipnetif->if_flags = IPNETIF_LOOPBACK; 1309 1310 mutex_enter(&ips->ips_avl_lock); 1311 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); 1312 avl_insert(&ips->ips_avl_by_index, ipnetif, where); 1313 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); 1314 avl_insert(&ips->ips_avl_by_name, ipnetif, where); 1315 mutex_exit(&ips->ips_avl_lock); 1316 1317 return (ipnetif); 1318 } 1319 1320 static void 1321 ipnetif_remove(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1322 { 1323 ipnet_t *ipnet; 1324 1325 ipnet_walkers_inc(ips); 1326 /* Send a SIGHUP to all open streams associated with this ipnetif. */ 1327 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; 1328 ipnet = list_next(&ips->ips_str_list, ipnet)) { 1329 if (ipnet->ipnet_if == ipnetif) 1330 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1331 } 1332 ipnet_walkers_dec(ips); 1333 mutex_enter(&ips->ips_avl_lock); 1334 avl_remove(&ips->ips_avl_by_index, ipnetif); 1335 avl_remove(&ips->ips_avl_by_name, ipnetif); 1336 mutex_exit(&ips->ips_avl_lock); 1337 /* 1338 * Release the reference we implicitly held in ipnetif_create(). 1339 */ 1340 ipnetif_refrele(ipnetif); 1341 } 1342 1343 static void 1344 ipnet_purge_addrlist(list_t *addrlist) 1345 { 1346 ipnetif_addr_t *ifa; 1347 1348 while ((ifa = list_head(addrlist)) != NULL) { 1349 list_remove(addrlist, ifa); 1350 if (ifa->ifa_shared != NULL) 1351 ipnetif_clone_release(ifa->ifa_shared); 1352 kmem_free(ifa, sizeof (*ifa)); 1353 } 1354 } 1355 1356 static void 1357 ipnetif_free(ipnetif_t *ipnetif) 1358 { 1359 ASSERT(ipnetif->if_refcnt == 0); 1360 ASSERT(ipnetif->if_sharecnt == 0); 1361 1362 /* Remove IPv4/v6 address lists from the ipnetif */ 1363 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); 1364 list_destroy(&ipnetif->if_ip4addr_list); 1365 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); 1366 list_destroy(&ipnetif->if_ip6addr_list); 1367 mutex_destroy(&ipnetif->if_addr_lock); 1368 mutex_destroy(&ipnetif->if_reflock); 1369 if (ipnetif->if_dev != 0) 1370 id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); 1371 kmem_free(ipnetif, sizeof (*ipnetif)); 1372 } 1373 1374 /* 1375 * Create an ipnetif_addr_t with the given logical interface id (lif) 1376 * and add it to the supplied ipnetif. The lif is the netinfo 1377 * representation of logical interface id, and we use this id to match 1378 * incoming netinfo events against our lists of addresses. 1379 */ 1380 static void 1381 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) 1382 { 1383 ipnetif_addr_t *ifaddr; 1384 zoneid_t zoneid; 1385 struct sockaddr_in bcast; 1386 struct sockaddr_storage addr; 1387 net_ifaddr_t type = NA_ADDRESS; 1388 uint64_t phyif = ipnetif->if_index; 1389 1390 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || 1391 net_getlifzone(nd, phyif, lif, &zoneid) != 0) 1392 return; 1393 1394 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) 1395 return; 1396 ifaddr->ifa_zone = zoneid; 1397 ifaddr->ifa_id = lif; 1398 ifaddr->ifa_shared = NULL; 1399 1400 switch (addr.ss_family) { 1401 case AF_INET: 1402 ifaddr->ifa_ip4addr = 1403 ((struct sockaddr_in *)&addr)->sin_addr.s_addr; 1404 /* 1405 * Try and get the broadcast address. Note that it's okay for 1406 * an interface to not have a broadcast address, so we don't 1407 * fail the entire operation if net_getlifaddr() fails here. 1408 */ 1409 type = NA_BROADCAST; 1410 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) 1411 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; 1412 break; 1413 case AF_INET6: 1414 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; 1415 break; 1416 } 1417 1418 /* 1419 * The zoneid stored in ipnetif_t needs to correspond to the actual 1420 * zone the address is being used in. This facilitates finding the 1421 * correct netstack_t pointer, amongst other things, later. 1422 */ 1423 if (zoneid == ALL_ZONES) 1424 zoneid = GLOBAL_ZONEID; 1425 1426 mutex_enter(&ipnetif->if_addr_lock); 1427 if (zoneid != ipnetif->if_zoneid) { 1428 ipnetif_t *ifp2; 1429 1430 ifp2 = ipnetif_clone_create(ipnetif, zoneid); 1431 ifaddr->ifa_shared = ifp2; 1432 } 1433 list_insert_tail(addr.ss_family == AF_INET ? 1434 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); 1435 mutex_exit(&ipnetif->if_addr_lock); 1436 } 1437 1438 static void 1439 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) 1440 { 1441 mutex_enter(&ipnetif->if_addr_lock); 1442 if (ifaddr->ifa_shared != NULL) 1443 ipnetif_clone_release(ifaddr->ifa_shared); 1444 1445 list_remove(isv6 ? 1446 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); 1447 mutex_exit(&ipnetif->if_addr_lock); 1448 kmem_free(ifaddr, sizeof (*ifaddr)); 1449 } 1450 1451 static void 1452 ipnet_plumb_ev(ipnet_nicevent_t *ipne, ipnet_stack_t *ips, boolean_t isv6) 1453 { 1454 ipnetif_t *ipnetif; 1455 boolean_t refrele_needed = B_TRUE; 1456 uint64_t ifflags; 1457 uint64_t ifindex; 1458 char *ifname; 1459 1460 ifflags = 0; 1461 ifname = ipne->ipne_ifname; 1462 ifindex = ipne->ipne_ifindex; 1463 1464 (void) net_getlifflags(ipne->ipne_protocol, ifindex, 0, &ifflags); 1465 1466 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) { 1467 ipnetif = ipnetif_create(ifname, ifindex, ips, ifflags); 1468 refrele_needed = B_FALSE; 1469 } 1470 if (ipnetif != NULL) { 1471 ipnetif->if_flags |= 1472 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 1473 } 1474 1475 if (ipnetif->if_multicnt != 0) { 1476 if (ip_join_allmulti(ifindex, isv6, 1477 ips->ips_netstack->netstack_ip) == 0) { 1478 ipnetif->if_flags |= 1479 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; 1480 } 1481 } 1482 1483 if (refrele_needed) 1484 ipnetif_refrele(ipnetif); 1485 } 1486 1487 static void 1488 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) 1489 { 1490 ipnetif_t *ipnetif; 1491 1492 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1493 return; 1494 1495 mutex_enter(&ipnetif->if_addr_lock); 1496 ipnet_purge_addrlist(isv6 ? 1497 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); 1498 mutex_exit(&ipnetif->if_addr_lock); 1499 1500 /* 1501 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive 1502 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif 1503 * if both IPv4 and IPv6 interfaces have been unplumbed. 1504 */ 1505 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; 1506 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) 1507 ipnetif_remove(ipnetif, ips); 1508 ipnetif_refrele(ipnetif); 1509 } 1510 1511 static void 1512 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, 1513 ipnet_stack_t *ips, boolean_t isv6) 1514 { 1515 ipnetif_t *ipnetif; 1516 ipnetif_addr_t *ifaddr; 1517 1518 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1519 return; 1520 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { 1521 /* 1522 * We must have missed a NE_LIF_DOWN event. Delete this 1523 * ifaddr and re-create it. 1524 */ 1525 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1526 } 1527 1528 ipnet_add_ifaddr(lifindex, ipnetif, nd); 1529 ipnetif_refrele(ipnetif); 1530 } 1531 1532 static void 1533 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, 1534 boolean_t isv6) 1535 { 1536 ipnetif_t *ipnetif; 1537 ipnetif_addr_t *ifaddr; 1538 1539 if ((ipnetif = ipnetif_getby_index(ifindex, ips)) == NULL) 1540 return; 1541 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) 1542 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1543 ipnetif_refrele(ipnetif); 1544 /* 1545 * Make sure that open streams on this ipnetif are still allowed to 1546 * have it open. 1547 */ 1548 ipnetif_zonecheck(ipnetif, ips); 1549 } 1550 1551 /* 1552 * This callback from the NIC event framework dispatches a taskq as the event 1553 * handlers may block. 1554 */ 1555 /* ARGSUSED */ 1556 static int 1557 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) 1558 { 1559 ipnet_stack_t *ips = arg; 1560 hook_nic_event_t *hn = (hook_nic_event_t *)info; 1561 ipnet_nicevent_t *ipne; 1562 1563 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) 1564 return (0); 1565 ipne->ipne_event = hn->hne_event; 1566 ipne->ipne_protocol = hn->hne_protocol; 1567 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; 1568 ipne->ipne_ifindex = hn->hne_nic; 1569 ipne->ipne_lifindex = hn->hne_lif; 1570 if (hn->hne_datalen != 0) { 1571 (void) strlcpy(ipne->ipne_ifname, hn->hne_data, 1572 sizeof (ipne->ipne_ifname)); 1573 } 1574 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, 1575 ipne, DDI_NOSLEEP); 1576 return (0); 1577 } 1578 1579 static void 1580 ipnet_nicevent_task(void *arg) 1581 { 1582 ipnet_nicevent_t *ipne = arg; 1583 netstack_t *ns; 1584 ipnet_stack_t *ips; 1585 boolean_t isv6; 1586 1587 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) 1588 goto done; 1589 ips = ns->netstack_ipnet; 1590 isv6 = (ipne->ipne_protocol == ips->ips_ndv6); 1591 1592 mutex_enter(&ips->ips_event_lock); 1593 switch (ipne->ipne_event) { 1594 case NE_PLUMB: 1595 ipnet_plumb_ev(ipne, ips, isv6); 1596 break; 1597 case NE_UNPLUMB: 1598 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); 1599 break; 1600 case NE_LIF_UP: 1601 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, 1602 ipne->ipne_protocol, ips, isv6); 1603 break; 1604 case NE_LIF_DOWN: 1605 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, 1606 isv6); 1607 break; 1608 default: 1609 break; 1610 } 1611 mutex_exit(&ips->ips_event_lock); 1612 done: 1613 if (ns != NULL) 1614 netstack_rele(ns); 1615 kmem_free(ipne, sizeof (ipnet_nicevent_t)); 1616 } 1617 1618 dev_t 1619 ipnet_if_getdev(char *name, zoneid_t zoneid) 1620 { 1621 netstack_t *ns; 1622 ipnet_stack_t *ips; 1623 ipnetif_t *ipnetif; 1624 dev_t dev = (dev_t)-1; 1625 1626 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1627 return (dev); 1628 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1629 return (dev); 1630 1631 ips = ns->netstack_ipnet; 1632 mutex_enter(&ips->ips_avl_lock); 1633 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { 1634 if (ipnetif_in_zone(ipnetif, zoneid, ips)) 1635 dev = ipnetif->if_dev; 1636 } 1637 mutex_exit(&ips->ips_avl_lock); 1638 netstack_rele(ns); 1639 1640 return (dev); 1641 } 1642 1643 static ipnetif_t * 1644 ipnetif_getby_index(uint64_t id, ipnet_stack_t *ips) 1645 { 1646 ipnetif_t *ipnetif; 1647 1648 mutex_enter(&ips->ips_avl_lock); 1649 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) 1650 ipnetif_refhold(ipnetif); 1651 mutex_exit(&ips->ips_avl_lock); 1652 return (ipnetif); 1653 } 1654 1655 static ipnetif_t * 1656 ipnetif_getby_dev(dev_t dev, ipnet_stack_t *ips) 1657 { 1658 ipnetif_t *ipnetif; 1659 avl_tree_t *tree; 1660 1661 mutex_enter(&ips->ips_avl_lock); 1662 tree = &ips->ips_avl_by_index; 1663 for (ipnetif = avl_first(tree); ipnetif != NULL; 1664 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { 1665 if (ipnetif->if_dev == dev) { 1666 ipnetif_refhold(ipnetif); 1667 break; 1668 } 1669 } 1670 mutex_exit(&ips->ips_avl_lock); 1671 return (ipnetif); 1672 } 1673 1674 static ipnetif_addr_t * 1675 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) 1676 { 1677 ipnetif_addr_t *ifaddr; 1678 list_t *list; 1679 1680 mutex_enter(&ipnetif->if_addr_lock); 1681 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; 1682 for (ifaddr = list_head(list); ifaddr != NULL; 1683 ifaddr = list_next(list, ifaddr)) { 1684 if (lid == ifaddr->ifa_id) 1685 break; 1686 } 1687 mutex_exit(&ipnetif->if_addr_lock); 1688 return (ifaddr); 1689 } 1690 1691 /* ARGSUSED */ 1692 static void * 1693 ipnet_stack_init(netstackid_t stackid, netstack_t *ns) 1694 { 1695 ipnet_stack_t *ips; 1696 1697 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); 1698 ips->ips_netstack = ns; 1699 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); 1700 avl_create(&ips->ips_avl_by_index, ipnetif_compare_index, 1701 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); 1702 avl_create(&ips->ips_avl_by_name, ipnetif_compare_name, 1703 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); 1704 avl_create(&ips->ips_avl_by_shared, ipnetif_compare_name_zone, 1705 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_shared)); 1706 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); 1707 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); 1708 list_create(&ips->ips_str_list, sizeof (ipnet_t), 1709 offsetof(ipnet_t, ipnet_next)); 1710 ipnet_register_netihook(ips); 1711 return (ips); 1712 } 1713 1714 /* ARGSUSED */ 1715 static void 1716 ipnet_stack_fini(netstackid_t stackid, void *arg) 1717 { 1718 ipnet_stack_t *ips = arg; 1719 ipnetif_t *ipnetif, *nipnetif; 1720 1721 if (ips->ips_kstatp != NULL) { 1722 zoneid_t zoneid; 1723 1724 zoneid = netstackid_to_zoneid(stackid); 1725 net_kstat_delete(net_zoneidtonetid(zoneid), ips->ips_kstatp); 1726 } 1727 if (ips->ips_ndv4 != NULL) { 1728 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, 1729 ips->ips_nicevents) == 0); 1730 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 1731 } 1732 if (ips->ips_ndv6 != NULL) { 1733 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, 1734 ips->ips_nicevents) == 0); 1735 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 1736 } 1737 hook_free(ips->ips_nicevents); 1738 1739 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1740 ipnetif = nipnetif) { 1741 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); 1742 ipnetif_remove(ipnetif, ips); 1743 } 1744 avl_destroy(&ips->ips_avl_by_shared); 1745 avl_destroy(&ips->ips_avl_by_index); 1746 avl_destroy(&ips->ips_avl_by_name); 1747 mutex_destroy(&ips->ips_avl_lock); 1748 mutex_destroy(&ips->ips_walkers_lock); 1749 cv_destroy(&ips->ips_walkers_cv); 1750 list_destroy(&ips->ips_str_list); 1751 kmem_free(ips, sizeof (*ips)); 1752 } 1753 1754 /* Do any of the addresses in addrlist belong the supplied zoneid? */ 1755 static boolean_t 1756 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) 1757 { 1758 ipnetif_addr_t *ifa; 1759 1760 for (ifa = list_head(addrlist); ifa != NULL; 1761 ifa = list_next(addrlist, ifa)) { 1762 if (ifa->ifa_zone == zoneid) 1763 return (B_TRUE); 1764 } 1765 return (B_FALSE); 1766 } 1767 1768 /* Should the supplied ipnetif be visible from the supplied zoneid? */ 1769 static boolean_t 1770 ipnetif_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) 1771 { 1772 int ret; 1773 1774 /* 1775 * The global zone has visibility into all interfaces in the global 1776 * stack, and exclusive stack zones have visibility into all 1777 * interfaces in their stack. 1778 */ 1779 if (zoneid == GLOBAL_ZONEID || 1780 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1781 return (B_TRUE); 1782 1783 /* 1784 * Shared-stack zones only have visibility for interfaces that have 1785 * addresses in their zone. 1786 */ 1787 mutex_enter(&ipnetif->if_addr_lock); 1788 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || 1789 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); 1790 mutex_exit(&ipnetif->if_addr_lock); 1791 return (ret); 1792 } 1793 1794 /* 1795 * Verify that any ipnet_t that has a reference to the supplied ipnetif should 1796 * still be allowed to have it open. A given ipnet_t may no longer be allowed 1797 * to have an ipnetif open if there are no longer any addresses that belong to 1798 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the 1799 * case, send the ipnet_t an M_HANGUP. 1800 */ 1801 static void 1802 ipnetif_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1803 { 1804 list_t *strlist = &ips->ips_str_list; 1805 ipnet_t *ipnet; 1806 1807 ipnet_walkers_inc(ips); 1808 for (ipnet = list_head(strlist); ipnet != NULL; 1809 ipnet = list_next(strlist, ipnet)) { 1810 if (ipnet->ipnet_if != ipnetif) 1811 continue; 1812 if (!ipnetif_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) 1813 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1814 } 1815 ipnet_walkers_dec(ips); 1816 } 1817 1818 void 1819 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) 1820 { 1821 ipnetif_t *ipnetif; 1822 list_t cbdata; 1823 ipnetif_cbdata_t *cbnode; 1824 netstack_t *ns; 1825 ipnet_stack_t *ips; 1826 1827 /* 1828 * On labeled systems, non-global zones shouldn't see anything 1829 * in /dev/ipnet. 1830 */ 1831 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1832 return; 1833 1834 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1835 return; 1836 1837 ips = ns->netstack_ipnet; 1838 list_create(&cbdata, sizeof (ipnetif_cbdata_t), 1839 offsetof(ipnetif_cbdata_t, ic_next)); 1840 1841 mutex_enter(&ips->ips_avl_lock); 1842 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1843 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { 1844 if (!ipnetif_in_zone(ipnetif, zoneid, ips)) 1845 continue; 1846 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); 1847 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); 1848 cbnode->ic_dev = ipnetif->if_dev; 1849 list_insert_head(&cbdata, cbnode); 1850 } 1851 mutex_exit(&ips->ips_avl_lock); 1852 1853 while ((cbnode = list_head(&cbdata)) != NULL) { 1854 cb(cbnode->ic_ifname, arg, cbnode->ic_dev); 1855 list_remove(&cbdata, cbnode); 1856 kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); 1857 } 1858 list_destroy(&cbdata); 1859 netstack_rele(ns); 1860 } 1861 1862 static int 1863 ipnetif_compare_index(const void *index_ptr, const void *ipnetifp) 1864 { 1865 int64_t index1 = *((int64_t *)index_ptr); 1866 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; 1867 1868 return (SIGNOF(index2 - index1)); 1869 } 1870 1871 static int 1872 ipnetif_compare_name(const void *name_ptr, const void *ipnetifp) 1873 { 1874 int res; 1875 1876 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); 1877 return (SIGNOF(res)); 1878 } 1879 1880 static int 1881 ipnetif_compare_name_zone(const void *key_ptr, const void *ipnetifp) 1882 { 1883 const uintptr_t *ptr = key_ptr; 1884 const ipnetif_t *ifp; 1885 int res; 1886 1887 ifp = ipnetifp; 1888 res = ifp->if_zoneid - ptr[0]; 1889 if (res != 0) 1890 return (SIGNOF(res)); 1891 res = strcmp(ifp->if_name, (char *)ptr[1]); 1892 return (SIGNOF(res)); 1893 } 1894 1895 static void 1896 ipnetif_refhold(ipnetif_t *ipnetif) 1897 { 1898 mutex_enter(&ipnetif->if_reflock); 1899 ipnetif->if_refcnt++; 1900 mutex_exit(&ipnetif->if_reflock); 1901 } 1902 1903 static void 1904 ipnetif_refrele(ipnetif_t *ipnetif) 1905 { 1906 mutex_enter(&ipnetif->if_reflock); 1907 ASSERT(ipnetif->if_refcnt > 0); 1908 if (--ipnetif->if_refcnt == 0) 1909 ipnetif_free(ipnetif); 1910 else 1911 mutex_exit(&ipnetif->if_reflock); 1912 } 1913 1914 static void 1915 ipnet_walkers_inc(ipnet_stack_t *ips) 1916 { 1917 mutex_enter(&ips->ips_walkers_lock); 1918 ips->ips_walkers_cnt++; 1919 mutex_exit(&ips->ips_walkers_lock); 1920 } 1921 1922 static void 1923 ipnet_walkers_dec(ipnet_stack_t *ips) 1924 { 1925 mutex_enter(&ips->ips_walkers_lock); 1926 ASSERT(ips->ips_walkers_cnt != 0); 1927 if (--ips->ips_walkers_cnt == 0) 1928 cv_broadcast(&ips->ips_walkers_cv); 1929 mutex_exit(&ips->ips_walkers_lock); 1930 } 1931 1932 /*ARGSUSED*/ 1933 static int 1934 ipobs_bounce_func(hook_event_token_t token, hook_data_t info, void *arg) 1935 { 1936 hook_pkt_observe_t *hdr; 1937 pfv_t func = (pfv_t)arg; 1938 mblk_t *mp; 1939 1940 hdr = (hook_pkt_observe_t *)info; 1941 /* 1942 * Code in ip_input() expects that it is the only one accessing the 1943 * packet. 1944 */ 1945 mp = copymsg(hdr->hpo_pkt); 1946 if (mp == NULL) { 1947 netstack_t *ns = hdr->hpo_ctx; 1948 ipnet_stack_t *ips = ns->netstack_ipnet; 1949 1950 IPSK_BUMP(ips, ik_dispatchDupDrop); 1951 return (0); 1952 } 1953 1954 hdr = (hook_pkt_observe_t *)mp->b_rptr; 1955 hdr->hpo_pkt = mp; 1956 1957 func(mp); 1958 1959 return (0); 1960 } 1961 1962 hook_t * 1963 ipobs_register_hook(netstack_t *ns, pfv_t func) 1964 { 1965 ip_stack_t *ipst = ns->netstack_ip; 1966 char name[32]; 1967 hook_t *hook; 1968 1969 HOOK_INIT(hook, ipobs_bounce_func, "", (void *)func); 1970 VERIFY(hook != NULL); 1971 1972 /* 1973 * To register multiple hooks with he same callback function, 1974 * a unique name is needed. 1975 */ 1976 (void) snprintf(name, sizeof (name), "ipobserve_%p", (void *)hook); 1977 hook->h_name = strdup(name); 1978 1979 (void) net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook); 1980 (void) net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook); 1981 1982 return (hook); 1983 } 1984 1985 void 1986 ipobs_unregister_hook(netstack_t *ns, hook_t *hook) 1987 { 1988 ip_stack_t *ipst = ns->netstack_ip; 1989 1990 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, hook); 1991 1992 (void) net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, hook); 1993 1994 strfree(hook->h_name); 1995 1996 hook_free(hook); 1997 } 1998 1999 /* ******************************************************************** */ 2000 /* BPF Functions below */ 2001 /* ******************************************************************** */ 2002 2003 /* 2004 * Convenience function to make mapping a zoneid to an ipnet_stack_t easy. 2005 */ 2006 ipnet_stack_t * 2007 ipnet_find_by_zoneid(zoneid_t zoneid) 2008 { 2009 netstack_t *ns; 2010 2011 VERIFY((ns = netstack_find_by_zoneid(zoneid)) != NULL); 2012 return (ns->netstack_ipnet); 2013 } 2014 2015 /* 2016 * Functions, such as the above ipnet_find_by_zoneid(), will return a 2017 * pointer to ipnet_stack_t by calling a netstack lookup function. 2018 * The netstack_find_*() functions return a pointer after doing a "hold" 2019 * on the data structure and thereby require a "release" when the caller 2020 * is finished with it. We need to mirror that API here and thus a caller 2021 * of ipnet_find_by_zoneid() is required to call ipnet_rele(). 2022 */ 2023 void 2024 ipnet_rele(ipnet_stack_t *ips) 2025 { 2026 netstack_rele(ips->ips_netstack); 2027 } 2028 2029 /* 2030 */ 2031 void 2032 ipnet_set_itap(bpf_itap_fn_t tapfunc) 2033 { 2034 ipnet_itap = tapfunc; 2035 } 2036 2037 /* 2038 * The list of interfaces available via ipnet is private for each zone, 2039 * so the AVL tree of each zone must be searched for a given name, even 2040 * if all names are unique. 2041 */ 2042 int 2043 ipnet_open_byname(const char *name, ipnetif_t **ptr, zoneid_t zoneid) 2044 { 2045 ipnet_stack_t *ips; 2046 ipnetif_t *ipnetif; 2047 2048 ASSERT(ptr != NULL); 2049 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL); 2050 2051 mutex_enter(&ips->ips_avl_lock); 2052 2053 /* 2054 * Shared instance zone? 2055 */ 2056 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) { 2057 uintptr_t key[2] = { zoneid, (uintptr_t)name }; 2058 2059 ipnetif = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL); 2060 } else { 2061 ipnetif = avl_find(&ips->ips_avl_by_name, (void *)name, NULL); 2062 } 2063 if (ipnetif != NULL) 2064 ipnetif_refhold(ipnetif); 2065 mutex_exit(&ips->ips_avl_lock); 2066 2067 *ptr = ipnetif; 2068 ipnet_rele(ips); 2069 2070 if (ipnetif == NULL) 2071 return (ESRCH); 2072 return (0); 2073 } 2074 2075 void 2076 ipnet_close_byhandle(ipnetif_t *ifp) 2077 { 2078 ASSERT(ifp != NULL); 2079 ipnetif_refrele(ifp); 2080 } 2081 2082 const char * 2083 ipnet_name(ipnetif_t *ifp) 2084 { 2085 ASSERT(ifp != NULL); 2086 return (ifp->if_name); 2087 } 2088 2089 /* 2090 * To find the linkid for a given name, it is necessary to know which zone 2091 * the interface name belongs to and to search the avl tree for that zone 2092 * as there is no master list of all interfaces and which zone they belong 2093 * to. It is assumed that the caller of this function is somehow already 2094 * working with the ipnet interfaces and hence the ips_event_lock is held. 2095 * When BPF calls into this function, it is doing so because of an event 2096 * in ipnet, and thus ipnet holds the ips_event_lock. Thus the datalink id 2097 * value returned has meaning without the need for grabbing a hold on the 2098 * owning structure. 2099 */ 2100 int 2101 ipnet_get_linkid_byname(const char *name, uint_t *idp, zoneid_t zoneid) 2102 { 2103 ipnet_stack_t *ips; 2104 ipnetif_t *ifp; 2105 2106 VERIFY((ips = ipnet_find_by_zoneid(zoneid)) != NULL); 2107 ASSERT(mutex_owned(&ips->ips_event_lock)); 2108 2109 mutex_enter(&ips->ips_avl_lock); 2110 ifp = avl_find(&ips->ips_avl_by_name, (void *)name, NULL); 2111 if (ifp != NULL) 2112 *idp = (uint_t)ifp->if_index; 2113 2114 /* 2115 * Shared instance zone? 2116 */ 2117 if (netstackid_to_zoneid(zoneid_to_netstackid(zoneid)) != zoneid) { 2118 uintptr_t key[2] = { zoneid, (uintptr_t)name }; 2119 2120 ifp = avl_find(&ips->ips_avl_by_shared, (void *)key, NULL); 2121 if (ifp != NULL) 2122 *idp = (uint_t)ifp->if_index; 2123 } 2124 2125 mutex_exit(&ips->ips_avl_lock); 2126 ipnet_rele(ips); 2127 2128 if (ifp == NULL) 2129 return (ESRCH); 2130 return (0); 2131 } 2132 2133 /* 2134 * Strictly speaking, there is no such thing as a "client" in ipnet, like 2135 * there is in mac. BPF only needs to have this because it is required as 2136 * part of interfacing correctly with mac. The reuse of the original 2137 * ipnetif_t as a client poses no danger, so long as it is done with its 2138 * own ref-count'd hold that is given up on close. 2139 */ 2140 int 2141 ipnet_client_open(ipnetif_t *ptr, ipnetif_t **result) 2142 { 2143 ASSERT(ptr != NULL); 2144 ASSERT(result != NULL); 2145 ipnetif_refhold(ptr); 2146 *result = ptr; 2147 2148 return (0); 2149 } 2150 2151 void 2152 ipnet_client_close(ipnetif_t *ptr) 2153 { 2154 ASSERT(ptr != NULL); 2155 ipnetif_refrele(ptr); 2156 } 2157 2158 /* 2159 * This is called from BPF when it needs to start receiving packets 2160 * from ipnet. 2161 * 2162 * The use of the ipnet_t structure here is somewhat lightweight when 2163 * compared to how it is used elsewhere but it already has all of the 2164 * right fields in it, so reuse here doesn't seem out of order. Its 2165 * primary purpose here is to provide the means to store pointers for 2166 * use when ipnet_promisc_remove() needs to be called. 2167 * 2168 * This should never be called for the IPNET_MINOR_LO device as it is 2169 * never created via ipnetif_create. 2170 */ 2171 /*ARGSUSED*/ 2172 int 2173 ipnet_promisc_add(void *handle, uint_t how, void *data, uintptr_t *mhandle, 2174 int flags) 2175 { 2176 ip_stack_t *ipst; 2177 netstack_t *ns; 2178 ipnetif_t *ifp; 2179 ipnet_t *ipnet; 2180 char name[32]; 2181 int error; 2182 2183 ifp = (ipnetif_t *)handle; 2184 ns = netstack_find_by_zoneid(ifp->if_zoneid); 2185 2186 if ((how == DL_PROMISC_PHYS) || (how == DL_PROMISC_MULTI)) { 2187 error = ipnet_join_allmulti(ifp, ns->netstack_ipnet); 2188 if (error != 0) 2189 return (error); 2190 } else { 2191 return (EINVAL); 2192 } 2193 2194 ipnet = kmem_zalloc(sizeof (*ipnet), KM_SLEEP); 2195 ipnet->ipnet_if = ifp; 2196 ipnet->ipnet_ns = ns; 2197 ipnet->ipnet_flags = flags; 2198 2199 if ((ifp->if_flags & IPNETIF_LOOPBACK) != 0) { 2200 ipnet->ipnet_acceptfn = ipnet_loaccept; 2201 } else { 2202 ipnet->ipnet_acceptfn = ipnet_accept; 2203 } 2204 2205 /* 2206 * To register multiple hooks with the same callback function, 2207 * a unique name is needed. 2208 */ 2209 HOOK_INIT(ipnet->ipnet_hook, ipnet_bpf_bounce, "", ipnet); 2210 (void) snprintf(name, sizeof (name), "ipnet_promisc_%p", 2211 (void *)ipnet->ipnet_hook); 2212 ipnet->ipnet_hook->h_name = strdup(name); 2213 ipnet->ipnet_data = data; 2214 ipnet->ipnet_zoneid = ifp->if_zoneid; 2215 2216 ipst = ns->netstack_ip; 2217 2218 error = net_hook_register(ipst->ips_ip4_observe_pr, NH_OBSERVE, 2219 ipnet->ipnet_hook); 2220 if (error != 0) 2221 goto regfail; 2222 2223 error = net_hook_register(ipst->ips_ip6_observe_pr, NH_OBSERVE, 2224 ipnet->ipnet_hook); 2225 if (error != 0) { 2226 (void) net_hook_unregister(ipst->ips_ip4_observe_pr, 2227 NH_OBSERVE, ipnet->ipnet_hook); 2228 goto regfail; 2229 } 2230 2231 *mhandle = (uintptr_t)ipnet; 2232 netstack_rele(ns); 2233 2234 return (0); 2235 2236 regfail: 2237 cmn_err(CE_WARN, "net_hook_register failed: %d", error); 2238 strfree(ipnet->ipnet_hook->h_name); 2239 hook_free(ipnet->ipnet_hook); 2240 netstack_rele(ns); 2241 return (error); 2242 } 2243 2244 void 2245 ipnet_promisc_remove(void *data) 2246 { 2247 ip_stack_t *ipst; 2248 ipnet_t *ipnet; 2249 hook_t *hook; 2250 2251 ipnet = data; 2252 ipst = ipnet->ipnet_ns->netstack_ip; 2253 hook = ipnet->ipnet_hook; 2254 2255 VERIFY(net_hook_unregister(ipst->ips_ip4_observe_pr, NH_OBSERVE, 2256 hook) == 0); 2257 2258 VERIFY(net_hook_unregister(ipst->ips_ip6_observe_pr, NH_OBSERVE, 2259 hook) == 0); 2260 2261 strfree(hook->h_name); 2262 2263 hook_free(hook); 2264 2265 kmem_free(ipnet, sizeof (*ipnet)); 2266 } 2267 2268 /* 2269 * arg here comes from the ipnet_t allocated in ipnet_promisc_add. 2270 * An important field from that structure is "ipnet_data" that 2271 * contains the "data" pointer passed into ipnet_promisc_add: it needs 2272 * to be passed back to bpf when we call into ipnet_itap. 2273 * 2274 * ipnet_itap is set by ipnet_set_bpfattach, which in turn is called 2275 * from BPF. 2276 */ 2277 /*ARGSUSED*/ 2278 static int 2279 ipnet_bpf_bounce(hook_event_token_t token, hook_data_t info, void *arg) 2280 { 2281 hook_pkt_observe_t *hdr; 2282 ipnet_addrp_t src; 2283 ipnet_addrp_t dst; 2284 ipnet_stack_t *ips; 2285 ipnet_t *ipnet; 2286 mblk_t *netmp; 2287 mblk_t *mp; 2288 2289 hdr = (hook_pkt_observe_t *)info; 2290 mp = hdr->hpo_pkt; 2291 ipnet = (ipnet_t *)arg; 2292 ips = ((netstack_t *)hdr->hpo_ctx)->netstack_ipnet; 2293 2294 netmp = hdr->hpo_pkt->b_cont; 2295 src.iap_family = hdr->hpo_family; 2296 dst.iap_family = hdr->hpo_family; 2297 2298 if (hdr->hpo_family == AF_INET) { 2299 src.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_src; 2300 dst.iap_addr4 = &((ipha_t *)(netmp->b_rptr))->ipha_dst; 2301 } else { 2302 src.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_src; 2303 dst.iap_addr6 = &((ip6_t *)(netmp->b_rptr))->ip6_dst; 2304 } 2305 2306 if (!(*ipnet->ipnet_acceptfn)(ipnet, hdr, &src, &dst)) { 2307 IPSK_BUMP(ips, ik_acceptFail); 2308 return (0); 2309 } 2310 IPSK_BUMP(ips, ik_acceptOk); 2311 2312 ipnet_itap(ipnet->ipnet_data, mp, 2313 hdr->hpo_htype == IPOBS_HOOK_OUTBOUND, 2314 ntohl(hdr->hpo_pktlen) + MBLKL(mp)); 2315 2316 return (0); 2317 } 2318 2319 /* 2320 * clone'd ipnetif_t's are created when a shared IP instance zone comes 2321 * to life and configures an IP address. The model that BPF uses is that 2322 * each interface must have a unique pointer and each interface must be 2323 * representative of what it can capture. They are limited to one DLT 2324 * per interface and one zone per interface. Thus every interface that 2325 * can be seen in a zone must be announced via an attach to bpf. For 2326 * shared instance zones, this means the ipnet driver needs to detect 2327 * when an address is added to an interface in a zone for the first 2328 * time (and also when the last address is removed.) 2329 */ 2330 static ipnetif_t * 2331 ipnetif_clone_create(ipnetif_t *ifp, zoneid_t zoneid) 2332 { 2333 uintptr_t key[2] = { zoneid, (uintptr_t)ifp->if_name }; 2334 ipnet_stack_t *ips = ifp->if_stackp; 2335 avl_index_t where = 0; 2336 ipnetif_t *newif; 2337 2338 mutex_enter(&ips->ips_avl_lock); 2339 newif = avl_find(&ips->ips_avl_by_shared, (void *)key, &where); 2340 if (newif != NULL) { 2341 ipnetif_refhold(newif); 2342 newif->if_sharecnt++; 2343 mutex_exit(&ips->ips_avl_lock); 2344 return (newif); 2345 } 2346 2347 newif = ipnet_alloc_if(ips); 2348 if (newif == NULL) { 2349 mutex_exit(&ips->ips_avl_lock); 2350 return (NULL); 2351 } 2352 2353 newif->if_refcnt = 1; 2354 newif->if_sharecnt = 1; 2355 newif->if_zoneid = zoneid; 2356 (void) strlcpy(newif->if_name, ifp->if_name, LIFNAMSIZ); 2357 newif->if_flags = ifp->if_flags & IPNETIF_LOOPBACK; 2358 newif->if_index = ifp->if_index; 2359 2360 avl_insert(&ips->ips_avl_by_shared, newif, where); 2361 mutex_exit(&ips->ips_avl_lock); 2362 2363 return (newif); 2364 } 2365 2366 static void 2367 ipnetif_clone_release(ipnetif_t *ipnetif) 2368 { 2369 boolean_t dofree = B_FALSE; 2370 boolean_t doremove = B_FALSE; 2371 ipnet_stack_t *ips = ipnetif->if_stackp; 2372 2373 mutex_enter(&ipnetif->if_reflock); 2374 ASSERT(ipnetif->if_refcnt > 0); 2375 if (--ipnetif->if_refcnt == 0) 2376 dofree = B_TRUE; 2377 ASSERT(ipnetif->if_sharecnt > 0); 2378 if (--ipnetif->if_sharecnt == 0) 2379 doremove = B_TRUE; 2380 mutex_exit(&ipnetif->if_reflock); 2381 if (doremove) { 2382 mutex_enter(&ips->ips_avl_lock); 2383 avl_remove(&ips->ips_avl_by_shared, ipnetif); 2384 mutex_exit(&ips->ips_avl_lock); 2385 } 2386 if (dofree) { 2387 ASSERT(ipnetif->if_sharecnt == 0); 2388 ipnetif_free(ipnetif); 2389 } 2390 } 2391