1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * The ipnet device defined here provides access to packets at the IP layer. To 29 * provide access to packets at this layer it registers a callback function in 30 * the ip module and when there are open instances of the device ip will pass 31 * packets into the device. Packets from ip are passed on the input, output and 32 * loopback paths. Internally the module returns to ip as soon as possible by 33 * deferring processing using a taskq. 34 * 35 * Management of the devices in /dev/ipnet/ is handled by the devname 36 * filesystem and use of the neti interfaces. This module registers for NIC 37 * events using the neti framework so that when IP interfaces are bought up, 38 * taken down etc. the ipnet module is notified and its view of the interfaces 39 * configured on the system adjusted. On attach, the module gets an initial 40 * view of the system again using the neti framework but as it has already 41 * registered for IP interface events, it is still up-to-date with any changes. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/conf.h> 46 #include <sys/cred.h> 47 #include <sys/stat.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/modctl.h> 51 #include <sys/dlpi.h> 52 #include <sys/strsun.h> 53 #include <sys/id_space.h> 54 #include <sys/kmem.h> 55 #include <sys/mkdev.h> 56 #include <sys/neti.h> 57 #include <net/if.h> 58 #include <sys/errno.h> 59 #include <sys/list.h> 60 #include <sys/ksynch.h> 61 #include <sys/hook_event.h> 62 #include <sys/stropts.h> 63 #include <sys/sysmacros.h> 64 #include <inet/ip.h> 65 #include <inet/ip_multi.h> 66 #include <inet/ip6.h> 67 #include <inet/ipnet.h> 68 69 static struct module_info ipnet_minfo = { 70 1, /* mi_idnum */ 71 "ipnet", /* mi_idname */ 72 0, /* mi_minpsz */ 73 INFPSZ, /* mi_maxpsz */ 74 2048, /* mi_hiwat */ 75 0 /* mi_lowat */ 76 }; 77 78 /* 79 * List to hold static view of ipnetif_t's on the system. This is needed to 80 * avoid holding the lock protecting the avl tree of ipnetif's over the 81 * callback into the dev filesystem. 82 */ 83 typedef struct ipnetif_cbdata { 84 char ic_ifname[LIFNAMSIZ]; 85 dev_t ic_dev; 86 list_node_t ic_next; 87 } ipnetif_cbdata_t; 88 89 /* 90 * Convenience enumerated type for ipnet_accept(). It describes the 91 * properties of a given ipnet_addrp_t relative to a single ipnet_t 92 * client stream. The values represent whether the address is ... 93 */ 94 typedef enum { 95 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ 96 IPNETADDR_MBCAST, /* a multicast or broadcast address. */ 97 IPNETADDR_UNKNOWN /* none of the above. */ 98 } ipnet_addrtype_t; 99 100 /* Argument used for the ipnet_nicevent_taskq callback. */ 101 typedef struct ipnet_nicevent_s { 102 nic_event_t ipne_event; 103 net_handle_t ipne_protocol; 104 netstackid_t ipne_stackid; 105 uint64_t ipne_ifindex; 106 uint64_t ipne_lifindex; 107 char ipne_ifname[LIFNAMSIZ]; 108 } ipnet_nicevent_t; 109 110 static dev_info_t *ipnet_dip; 111 static major_t ipnet_major; 112 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ 113 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ 114 static id_space_t *ipnet_minor_space; 115 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ 116 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ 117 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; 118 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; 119 120 static void ipnet_input(mblk_t *); 121 static int ipnet_wput(queue_t *, mblk_t *); 122 static int ipnet_rsrv(queue_t *); 123 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); 124 static int ipnet_close(queue_t *); 125 static void ipnet_ioctl(queue_t *, mblk_t *); 126 static void ipnet_iocdata(queue_t *, mblk_t *); 127 static void ipnet_wputnondata(queue_t *, mblk_t *); 128 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); 129 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); 130 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 131 static void ipnet_inforeq(queue_t *q, mblk_t *mp); 132 static void ipnet_bindreq(queue_t *q, mblk_t *mp); 133 static void ipnet_unbindreq(queue_t *q, mblk_t *mp); 134 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); 135 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); 136 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); 137 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); 138 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); 139 static void ipnet_nicevent_task(void *); 140 static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *); 141 static void ipnet_remove_if(ipnetif_t *, ipnet_stack_t *); 142 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); 143 static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *); 144 static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *); 145 static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); 146 static void ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *); 147 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); 148 static int ipnet_if_compare_name(const void *, const void *); 149 static int ipnet_if_compare_index(const void *, const void *); 150 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); 151 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); 152 static void ipnetif_refhold(ipnetif_t *); 153 static void ipnetif_refrele(ipnetif_t *); 154 static void ipnet_walkers_inc(ipnet_stack_t *); 155 static void ipnet_walkers_dec(ipnet_stack_t *); 156 static void ipnet_register_netihook(ipnet_stack_t *); 157 static void *ipnet_stack_init(netstackid_t, netstack_t *); 158 static void ipnet_stack_fini(netstackid_t, void *); 159 160 static struct qinit ipnet_rinit = { 161 NULL, /* qi_putp */ 162 ipnet_rsrv, /* qi_srvp */ 163 ipnet_open, /* qi_qopen */ 164 ipnet_close, /* qi_qclose */ 165 NULL, /* qi_qadmin */ 166 &ipnet_minfo, /* qi_minfo */ 167 }; 168 169 static struct qinit ipnet_winit = { 170 ipnet_wput, /* qi_putp */ 171 NULL, /* qi_srvp */ 172 NULL, /* qi_qopen */ 173 NULL, /* qi_qclose */ 174 NULL, /* qi_qadmin */ 175 &ipnet_minfo, /* qi_minfo */ 176 }; 177 178 static struct streamtab ipnet_info = { 179 &ipnet_rinit, &ipnet_winit 180 }; 181 182 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, 183 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, 184 ddi_quiesce_not_supported); 185 186 static struct modldrv modldrv = { 187 &mod_driverops, 188 "STREAMS ipnet driver", 189 &ipnet_ops 190 }; 191 192 static struct modlinkage modlinkage = { 193 MODREV_1, &modldrv, NULL 194 }; 195 196 /* 197 * Walk the list of physical interfaces on the machine, for each 198 * interface create a new ipnetif_t and add any addresses to it. We 199 * need to do the walk twice, once for IPv4 and once for IPv6. 200 * 201 * The interfaces are destroyed as part of ipnet_stack_fini() for each 202 * stack. Note that we cannot do this initialization in 203 * ipnet_stack_init(), since ipnet_stack_init() cannot fail. 204 */ 205 static int 206 ipnet_if_init(void) 207 { 208 netstack_handle_t nh; 209 netstack_t *ns; 210 ipnet_stack_t *ips; 211 int ret = 0; 212 213 netstack_next_init(&nh); 214 while ((ns = netstack_next(&nh)) != NULL) { 215 ips = ns->netstack_ipnet; 216 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0) 217 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE); 218 netstack_rele(ns); 219 if (ret != 0) 220 break; 221 } 222 netstack_next_fini(&nh); 223 return (ret); 224 } 225 226 /* 227 * Standard module entry points. 228 */ 229 int 230 _init(void) 231 { 232 int ret; 233 234 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) 235 return (ENODEV); 236 ipnet_minor_space = id_space_create("ipnet_minor_space", 237 IPNET_MINOR_MIN, MAXMIN32); 238 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); 239 /* 240 * We call ddi_taskq_create() with nthread == 1 to ensure in-order 241 * delivery of packets to clients. 242 */ 243 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); 244 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", 245 1, TASKQ_DEFAULTPRI, 0); 246 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { 247 ret = ENOMEM; 248 goto done; 249 } 250 if ((ret = ipnet_if_init()) == 0) 251 ret = mod_install(&modlinkage); 252 done: 253 if (ret != 0) { 254 if (ipnet_taskq != NULL) 255 ddi_taskq_destroy(ipnet_taskq); 256 if (ipnet_nicevent_taskq != NULL) 257 ddi_taskq_destroy(ipnet_nicevent_taskq); 258 netstack_unregister(NS_IPNET); 259 id_space_destroy(ipnet_minor_space); 260 } 261 return (ret); 262 } 263 264 int 265 _fini(void) 266 { 267 int err; 268 269 if ((err = mod_remove(&modlinkage)) != 0) 270 return (err); 271 ddi_taskq_destroy(ipnet_nicevent_taskq); 272 ddi_taskq_destroy(ipnet_taskq); 273 netstack_unregister(NS_IPNET); 274 id_space_destroy(ipnet_minor_space); 275 return (0); 276 } 277 278 int 279 _info(struct modinfo *modinfop) 280 { 281 return (mod_info(&modlinkage, modinfop)); 282 } 283 284 static void 285 ipnet_register_netihook(ipnet_stack_t *ips) 286 { 287 int ret; 288 zoneid_t zoneid; 289 netid_t netid; 290 291 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", 292 ips); 293 294 /* 295 * It is possible for an exclusive stack to be in the process of 296 * shutting down here, and the netid and protocol lookups could fail 297 * in that case. 298 */ 299 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid); 300 if ((netid = net_zoneidtonetid(zoneid)) == -1) 301 return; 302 303 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) { 304 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, 305 ips->ips_nicevents)) != 0) { 306 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 307 ips->ips_ndv4 = NULL; 308 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks" 309 " in zone %d: %d", zoneid, ret); 310 } 311 } 312 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) { 313 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, 314 ips->ips_nicevents)) != 0) { 315 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 316 ips->ips_ndv6 = NULL; 317 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks" 318 " in zone %d: %d", zoneid, ret); 319 } 320 } 321 } 322 323 /* 324 * This function is called on attach to build an initial view of the 325 * interfaces on the system. It will be called once for IPv4 and once 326 * for IPv6, although there is only one ipnet interface for both IPv4 327 * and IPv6 there are separate address lists. 328 */ 329 static int 330 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) 331 { 332 phy_if_t phyif; 333 lif_if_t lif; 334 ipnetif_t *ipnetif; 335 char name[LIFNAMSIZ]; 336 boolean_t new_if = B_FALSE; 337 uint64_t ifflags; 338 int ret = 0; 339 340 /* 341 * If ipnet_register_netihook() was unable to initialize this 342 * stack's net_handle_t, then we cannot populate any interface 343 * information. This usually happens when we attempted to 344 * grab a net_handle_t as a stack was shutting down. We don't 345 * want to fail the entire _init() operation because of a 346 * stack shutdown (other stacks will continue to work just 347 * fine), so we silently return success here. 348 */ 349 if (nd == NULL) 350 return (0); 351 352 /* 353 * Make sure we're not processing NIC events during the 354 * population of our interfaces and address lists. 355 */ 356 mutex_enter(&ips->ips_event_lock); 357 358 for (phyif = net_phygetnext(nd, 0); phyif != 0; 359 phyif = net_phygetnext(nd, phyif)) { 360 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) 361 continue; 362 if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) { 363 ipnetif = ipnet_create_if(name, phyif, ips); 364 if (ipnetif == NULL) { 365 ret = ENOMEM; 366 goto done; 367 } 368 new_if = B_TRUE; 369 } 370 ipnetif->if_flags |= 371 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 372 373 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; 374 lif = net_lifgetnext(nd, phyif, lif)) { 375 /* 376 * Skip addresses that aren't up. We'll add 377 * them when we receive an NE_LIF_UP event. 378 */ 379 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || 380 !(ifflags & IFF_UP)) 381 continue; 382 /* Don't add it if we already have it. */ 383 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) 384 continue; 385 ipnet_add_ifaddr(lif, ipnetif, nd); 386 } 387 if (!new_if) 388 ipnetif_refrele(ipnetif); 389 } 390 391 done: 392 mutex_exit(&ips->ips_event_lock); 393 return (ret); 394 } 395 396 static int 397 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 398 { 399 if (cmd != DDI_ATTACH) 400 return (DDI_FAILURE); 401 402 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, 403 DDI_PSEUDO, 0) == DDI_FAILURE) 404 return (DDI_FAILURE); 405 406 ipnet_dip = dip; 407 return (DDI_SUCCESS); 408 } 409 410 static int 411 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 412 { 413 if (cmd != DDI_DETACH) 414 return (DDI_FAILURE); 415 416 ASSERT(dip == ipnet_dip); 417 ddi_remove_minor_node(ipnet_dip, NULL); 418 ipnet_dip = NULL; 419 return (DDI_SUCCESS); 420 } 421 422 /* ARGSUSED */ 423 static int 424 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 425 { 426 int error = DDI_FAILURE; 427 428 switch (infocmd) { 429 case DDI_INFO_DEVT2INSTANCE: 430 *result = (void *)0; 431 error = DDI_SUCCESS; 432 break; 433 case DDI_INFO_DEVT2DEVINFO: 434 if (ipnet_dip != NULL) { 435 *result = ipnet_dip; 436 error = DDI_SUCCESS; 437 } 438 break; 439 } 440 return (error); 441 } 442 443 /* ARGSUSED */ 444 static int 445 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) 446 { 447 ipnet_t *ipnet; 448 netstack_t *ns = NULL; 449 ipnet_stack_t *ips; 450 int err = 0; 451 zoneid_t zoneid = crgetzoneid(crp); 452 453 /* 454 * If the system is labeled, only the global zone is allowed to open 455 * IP observability nodes. 456 */ 457 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 458 return (EACCES); 459 460 /* We don't support open as a module */ 461 if (sflag & MODOPEN) 462 return (ENOTSUP); 463 464 /* This driver is self-cloning, we don't support re-open. */ 465 if (rq->q_ptr != NULL) 466 return (EBUSY); 467 468 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) 469 return (ENOMEM); 470 471 VERIFY((ns = netstack_find_by_cred(crp)) != NULL); 472 ips = ns->netstack_ipnet; 473 474 rq->q_ptr = WR(rq)->q_ptr = ipnet; 475 ipnet->ipnet_rq = rq; 476 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); 477 ipnet->ipnet_zoneid = zoneid; 478 ipnet->ipnet_dlstate = DL_UNBOUND; 479 ipnet->ipnet_sap = 0; 480 ipnet->ipnet_ns = ns; 481 482 /* 483 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need 484 * to be processed after ipnet_if is set and the ipnet_t has been 485 * inserted in the ips_str_list. 486 */ 487 mutex_enter(&ips->ips_event_lock); 488 if (getminor(*dev) == IPNET_MINOR_LO) { 489 ipnet->ipnet_flags |= IPNET_LOMODE; 490 ipnet->ipnet_acceptfn = ipnet_loaccept; 491 } else { 492 ipnet->ipnet_acceptfn = ipnet_accept; 493 ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips); 494 if (ipnet->ipnet_if == NULL || 495 !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) { 496 err = ENODEV; 497 goto done; 498 } 499 } 500 501 mutex_enter(&ips->ips_walkers_lock); 502 while (ips->ips_walkers_cnt != 0) 503 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 504 list_insert_head(&ips->ips_str_list, ipnet); 505 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); 506 qprocson(rq); 507 508 /* 509 * Only register our callback if we're the first open client; we call 510 * unregister in close() for the last open client. 511 */ 512 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) 513 ipobs_register_hook(ns, ipnet_input); 514 mutex_exit(&ips->ips_walkers_lock); 515 516 done: 517 mutex_exit(&ips->ips_event_lock); 518 if (err != 0) { 519 netstack_rele(ns); 520 id_free(ipnet_minor_space, ipnet->ipnet_minor); 521 if (ipnet->ipnet_if != NULL) 522 ipnetif_refrele(ipnet->ipnet_if); 523 kmem_free(ipnet, sizeof (*ipnet)); 524 } 525 return (err); 526 } 527 528 static int 529 ipnet_close(queue_t *rq) 530 { 531 ipnet_t *ipnet = rq->q_ptr; 532 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; 533 534 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 535 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 536 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 537 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 538 539 mutex_enter(&ips->ips_walkers_lock); 540 while (ips->ips_walkers_cnt != 0) 541 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 542 543 qprocsoff(rq); 544 545 list_remove(&ips->ips_str_list, ipnet); 546 if (ipnet->ipnet_if != NULL) 547 ipnetif_refrele(ipnet->ipnet_if); 548 id_free(ipnet_minor_space, ipnet->ipnet_minor); 549 kmem_free(ipnet, sizeof (*ipnet)); 550 551 if (list_is_empty(&ips->ips_str_list)) 552 ipobs_unregister_hook(ips->ips_netstack, ipnet_input); 553 554 mutex_exit(&ips->ips_walkers_lock); 555 netstack_rele(ips->ips_netstack); 556 return (0); 557 } 558 559 static int 560 ipnet_wput(queue_t *q, mblk_t *mp) 561 { 562 switch (mp->b_datap->db_type) { 563 case M_FLUSH: 564 if (*mp->b_rptr & FLUSHW) { 565 flushq(q, FLUSHDATA); 566 *mp->b_rptr &= ~FLUSHW; 567 } 568 if (*mp->b_rptr & FLUSHR) 569 qreply(q, mp); 570 else 571 freemsg(mp); 572 break; 573 case M_PROTO: 574 case M_PCPROTO: 575 ipnet_wputnondata(q, mp); 576 break; 577 case M_IOCTL: 578 ipnet_ioctl(q, mp); 579 break; 580 case M_IOCDATA: 581 ipnet_iocdata(q, mp); 582 break; 583 default: 584 freemsg(mp); 585 break; 586 } 587 return (0); 588 } 589 590 static int 591 ipnet_rsrv(queue_t *q) 592 { 593 mblk_t *mp; 594 595 while ((mp = getq(q)) != NULL) { 596 ASSERT(DB_TYPE(mp) == M_DATA); 597 if (canputnext(q)) { 598 putnext(q, mp); 599 } else { 600 (void) putbq(q, mp); 601 break; 602 } 603 } 604 return (0); 605 } 606 607 static void 608 ipnet_ioctl(queue_t *q, mblk_t *mp) 609 { 610 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 611 612 switch (iocp->ioc_cmd) { 613 case DLIOCRAW: 614 miocack(q, mp, 0, 0); 615 break; 616 case DLIOCIPNETINFO: 617 if (iocp->ioc_count == TRANSPARENT) { 618 mcopyin(mp, NULL, sizeof (uint_t), NULL); 619 qreply(q, mp); 620 break; 621 } 622 /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */ 623 default: 624 miocnak(q, mp, 0, EINVAL); 625 break; 626 } 627 } 628 629 static void 630 ipnet_iocdata(queue_t *q, mblk_t *mp) 631 { 632 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 633 ipnet_t *ipnet = q->q_ptr; 634 635 switch (iocp->ioc_cmd) { 636 case DLIOCIPNETINFO: 637 if (*(int *)mp->b_cont->b_rptr == 1) 638 ipnet->ipnet_flags |= IPNET_INFO; 639 else if (*(int *)mp->b_cont->b_rptr == 0) 640 ipnet->ipnet_flags &= ~IPNET_INFO; 641 else 642 goto iocnak; 643 miocack(q, mp, 0, DL_IPNETINFO_VERSION); 644 break; 645 default: 646 iocnak: 647 miocnak(q, mp, 0, EINVAL); 648 break; 649 } 650 } 651 652 static void 653 ipnet_wputnondata(queue_t *q, mblk_t *mp) 654 { 655 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 656 t_uscalar_t prim = dlp->dl_primitive; 657 658 switch (prim) { 659 case DL_INFO_REQ: 660 ipnet_inforeq(q, mp); 661 break; 662 case DL_UNBIND_REQ: 663 ipnet_unbindreq(q, mp); 664 break; 665 case DL_BIND_REQ: 666 ipnet_bindreq(q, mp); 667 break; 668 case DL_PROMISCON_REQ: 669 ipnet_dlpromisconreq(q, mp); 670 break; 671 case DL_PROMISCOFF_REQ: 672 ipnet_dlpromiscoffreq(q, mp); 673 break; 674 case DL_UNITDATA_REQ: 675 case DL_DETACH_REQ: 676 case DL_PHYS_ADDR_REQ: 677 case DL_SET_PHYS_ADDR_REQ: 678 case DL_ENABMULTI_REQ: 679 case DL_DISABMULTI_REQ: 680 case DL_ATTACH_REQ: 681 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); 682 break; 683 default: 684 dlerrorack(q, mp, prim, DL_BADPRIM, 0); 685 break; 686 } 687 } 688 689 static void 690 ipnet_inforeq(queue_t *q, mblk_t *mp) 691 { 692 dl_info_ack_t *dlip; 693 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); 694 695 if (MBLKL(mp) < DL_INFO_REQ_SIZE) { 696 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); 697 return; 698 } 699 700 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) 701 return; 702 703 dlip = (dl_info_ack_t *)mp->b_rptr; 704 *dlip = ipnet_infoack; 705 qreply(q, mp); 706 } 707 708 static void 709 ipnet_bindreq(queue_t *q, mblk_t *mp) 710 { 711 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 712 int32_t sap; 713 ipnet_t *ipnet = q->q_ptr; 714 715 if (MBLKL(mp) < DL_BIND_REQ_SIZE) { 716 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); 717 return; 718 } 719 720 sap = dlp->bind_req.dl_sap; 721 if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) { 722 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); 723 } else { 724 ipnet->ipnet_sap = sap; 725 ipnet->ipnet_dlstate = DL_IDLE; 726 dlbindack(q, mp, sap, 0, 0, 0, 0); 727 } 728 } 729 730 static void 731 ipnet_unbindreq(queue_t *q, mblk_t *mp) 732 { 733 ipnet_t *ipnet = q->q_ptr; 734 735 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { 736 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); 737 return; 738 } 739 740 if (ipnet->ipnet_dlstate != DL_IDLE) { 741 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); 742 } else { 743 ipnet->ipnet_dlstate = DL_UNBOUND; 744 ipnet->ipnet_sap = 0; 745 dlokack(q, mp, DL_UNBIND_REQ); 746 } 747 } 748 749 static void 750 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) 751 { 752 ipnet_t *ipnet = q->q_ptr; 753 t_uscalar_t level; 754 int err; 755 756 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { 757 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 758 return; 759 } 760 761 if (ipnet->ipnet_flags & IPNET_LOMODE) { 762 dlokack(q, mp, DL_PROMISCON_REQ); 763 return; 764 } 765 766 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 767 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 768 if ((err = ipnet_join_allmulti(ipnet->ipnet_if, 769 ipnet->ipnet_ns->netstack_ipnet)) != 0) { 770 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); 771 return; 772 } 773 } 774 775 switch (level) { 776 case DL_PROMISC_PHYS: 777 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; 778 break; 779 case DL_PROMISC_SAP: 780 ipnet->ipnet_flags |= IPNET_PROMISC_SAP; 781 break; 782 case DL_PROMISC_MULTI: 783 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; 784 break; 785 default: 786 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 787 return; 788 } 789 790 dlokack(q, mp, DL_PROMISCON_REQ); 791 } 792 793 static void 794 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) 795 { 796 ipnet_t *ipnet = q->q_ptr; 797 t_uscalar_t level; 798 uint16_t orig_ipnet_flags = ipnet->ipnet_flags; 799 800 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { 801 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 802 return; 803 } 804 805 if (ipnet->ipnet_flags & IPNET_LOMODE) { 806 dlokack(q, mp, DL_PROMISCOFF_REQ); 807 return; 808 } 809 810 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 811 switch (level) { 812 case DL_PROMISC_PHYS: 813 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 814 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; 815 break; 816 case DL_PROMISC_SAP: 817 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) 818 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; 819 break; 820 case DL_PROMISC_MULTI: 821 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 822 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; 823 break; 824 default: 825 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 826 return; 827 } 828 829 if (orig_ipnet_flags == ipnet->ipnet_flags) { 830 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); 831 return; 832 } 833 834 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 835 ipnet_leave_allmulti(ipnet->ipnet_if, 836 ipnet->ipnet_ns->netstack_ipnet); 837 } 838 839 dlokack(q, mp, DL_PROMISCOFF_REQ); 840 } 841 842 static int 843 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 844 { 845 int err = 0; 846 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 847 uint64_t index = ipnetif->if_index; 848 849 mutex_enter(&ips->ips_event_lock); 850 if (ipnetif->if_multicnt == 0) { 851 ASSERT((ipnetif->if_flags & 852 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); 853 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { 854 err = ip_join_allmulti(index, B_FALSE, ipst); 855 if (err != 0) 856 goto done; 857 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; 858 } 859 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { 860 err = ip_join_allmulti(index, B_TRUE, ipst); 861 if (err != 0 && 862 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { 863 (void) ip_leave_allmulti(index, B_FALSE, ipst); 864 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 865 goto done; 866 } 867 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; 868 } 869 } 870 ipnetif->if_multicnt++; 871 872 done: 873 mutex_exit(&ips->ips_event_lock); 874 return (err); 875 } 876 877 static void 878 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 879 { 880 int err; 881 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 882 uint64_t index = ipnetif->if_index; 883 884 mutex_enter(&ips->ips_event_lock); 885 ASSERT(ipnetif->if_multicnt != 0); 886 if (--ipnetif->if_multicnt == 0) { 887 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { 888 err = ip_leave_allmulti(index, B_FALSE, ipst); 889 ASSERT(err == 0 || err == ENODEV); 890 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 891 } 892 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { 893 err = ip_leave_allmulti(index, B_TRUE, ipst); 894 ASSERT(err == 0 || err == ENODEV); 895 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; 896 } 897 } 898 mutex_exit(&ips->ips_event_lock); 899 } 900 901 static mblk_t * 902 ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp) 903 { 904 mblk_t *dlhdr; 905 dl_ipnetinfo_t *dl; 906 907 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { 908 freemsg(mp); 909 return (NULL); 910 } 911 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; 912 dl->dli_version = DL_IPNETINFO_VERSION; 913 dl->dli_len = htons(sizeof (*dl)); 914 dl->dli_ipver = ihd->ihd_ipver; 915 dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc); 916 dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst); 917 dlhdr->b_wptr += sizeof (*dl); 918 dlhdr->b_cont = mp; 919 920 return (dlhdr); 921 } 922 923 static ipnet_addrtype_t 924 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) 925 { 926 list_t *list; 927 ipnetif_t *ipnetif = ipnet->ipnet_if; 928 ipnetif_addr_t *ifaddr; 929 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; 930 931 /* First check if the address is multicast or limited broadcast. */ 932 switch (addr->iap_family) { 933 case AF_INET: 934 if (CLASSD(*(addr->iap_addr4)) || 935 *(addr->iap_addr4) == INADDR_BROADCAST) 936 return (IPNETADDR_MBCAST); 937 break; 938 case AF_INET6: 939 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) 940 return (IPNETADDR_MBCAST); 941 break; 942 } 943 944 /* 945 * Walk the address list to see if the address belongs to our 946 * interface or is one of our subnet broadcast addresses. 947 */ 948 mutex_enter(&ipnetif->if_addr_lock); 949 list = (addr->iap_family == AF_INET) ? 950 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; 951 for (ifaddr = list_head(list); 952 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; 953 ifaddr = list_next(list, ifaddr)) { 954 /* 955 * If we're not in the global zone, then only look at 956 * addresses in our zone. 957 */ 958 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 959 ipnet->ipnet_zoneid != ifaddr->ifa_zone) 960 continue; 961 switch (addr->iap_family) { 962 case AF_INET: 963 if (ifaddr->ifa_ip4addr != INADDR_ANY && 964 *(addr->iap_addr4) == ifaddr->ifa_ip4addr) 965 addrtype = IPNETADDR_MYADDR; 966 else if (ifaddr->ifa_brdaddr != INADDR_ANY && 967 *(addr->iap_addr4) == ifaddr->ifa_brdaddr) 968 addrtype = IPNETADDR_MBCAST; 969 break; 970 case AF_INET6: 971 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, 972 &ifaddr->ifa_ip6addr)) 973 addrtype = IPNETADDR_MYADDR; 974 break; 975 } 976 } 977 mutex_exit(&ipnetif->if_addr_lock); 978 979 return (addrtype); 980 } 981 982 /* 983 * Verify if the packet contained in ihd should be passed up to the 984 * ipnet client stream. 985 */ 986 static boolean_t 987 ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, 988 ipnet_addrp_t *dst) 989 { 990 uint64_t ifindex = ipnet->ipnet_if->if_index; 991 ipnet_addrtype_t srctype, dsttype; 992 993 srctype = ipnet_get_addrtype(ipnet, src); 994 dsttype = ipnet_get_addrtype(ipnet, dst); 995 996 /* 997 * Do not allow an ipnet stream to see packets that are not from or to 998 * its zone. The exception is when zones are using the shared stack 999 * model. In this case, streams in the global zone have visibility 1000 * into other shared-stack zones, and broadcast and multicast traffic 1001 * is visible by all zones in the stack. 1002 */ 1003 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1004 dsttype != IPNETADDR_MBCAST) { 1005 if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && 1006 ipnet->ipnet_zoneid != ihd->ihd_zdst) 1007 return (B_FALSE); 1008 } 1009 1010 /* 1011 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the 1012 * packet's IP version. 1013 */ 1014 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && 1015 ipnet->ipnet_sap != ihd->ihd_ipver) 1016 return (B_FALSE); 1017 1018 /* If the destination address is ours, then accept the packet. */ 1019 if (dsttype == IPNETADDR_MYADDR) 1020 return (B_TRUE); 1021 1022 /* 1023 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are 1024 * sent or received on the interface we're observing, or packets that 1025 * have our source address (this allows us to see packets we send). 1026 */ 1027 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { 1028 if (ihd->ihd_ifindex == ifindex || srctype == IPNETADDR_MYADDR) 1029 return (B_TRUE); 1030 } 1031 1032 /* 1033 * We accept multicast and broadcast packets transmitted or received 1034 * on the interface we're observing. 1035 */ 1036 if (dsttype == IPNETADDR_MBCAST && ihd->ihd_ifindex == ifindex) 1037 return (B_TRUE); 1038 1039 return (B_FALSE); 1040 } 1041 1042 /* 1043 * Verify if the packet contained in ihd should be passed up to the ipnet 1044 * client stream that's in IPNET_LOMODE. 1045 */ 1046 /* ARGSUSED */ 1047 static boolean_t 1048 ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, 1049 ipnet_addrp_t *dst) 1050 { 1051 if (ihd->ihd_htype != IPOBS_HOOK_LOCAL) 1052 return (B_FALSE); 1053 1054 /* 1055 * An ipnet stream must not see packets that are not from/to its zone. 1056 */ 1057 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { 1058 if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && 1059 ipnet->ipnet_zoneid != ihd->ihd_zdst) 1060 return (B_FALSE); 1061 } 1062 1063 return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver); 1064 } 1065 1066 static void 1067 ipnet_dispatch(void *arg) 1068 { 1069 mblk_t *mp = arg; 1070 ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; 1071 ipnet_t *ipnet; 1072 mblk_t *netmp; 1073 list_t *list; 1074 ipnet_stack_t *ips = ihd->ihd_stack->netstack_ipnet; 1075 ipnet_addrp_t src, dst; 1076 1077 if (ihd->ihd_ipver == IPV4_VERSION) { 1078 src.iap_family = dst.iap_family = AF_INET; 1079 src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src; 1080 dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst; 1081 } else { 1082 src.iap_family = dst.iap_family = AF_INET6; 1083 src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src; 1084 dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst; 1085 } 1086 1087 ipnet_walkers_inc(ips); 1088 1089 list = &ips->ips_str_list; 1090 for (ipnet = list_head(list); ipnet != NULL; 1091 ipnet = list_next(list, ipnet)) { 1092 if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst)) 1093 continue; 1094 1095 if (list_next(list, ipnet) == NULL) { 1096 netmp = ihd->ihd_mp; 1097 ihd->ihd_mp = NULL; 1098 } else { 1099 if ((netmp = dupmsg(ihd->ihd_mp)) == NULL && 1100 (netmp = copymsg(ihd->ihd_mp)) == NULL) { 1101 atomic_inc_64(&ips->ips_drops); 1102 continue; 1103 } 1104 } 1105 1106 if (ipnet->ipnet_flags & IPNET_INFO) { 1107 if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) { 1108 atomic_inc_64(&ips->ips_drops); 1109 continue; 1110 } 1111 } 1112 1113 if (ipnet->ipnet_rq->q_first == NULL && 1114 canputnext(ipnet->ipnet_rq)) { 1115 putnext(ipnet->ipnet_rq, netmp); 1116 } else if (canput(ipnet->ipnet_rq)) { 1117 (void) putq(ipnet->ipnet_rq, netmp); 1118 } else { 1119 freemsg(netmp); 1120 atomic_inc_64(&ips->ips_drops); 1121 } 1122 } 1123 1124 ipnet_walkers_dec(ips); 1125 1126 freemsg(ihd->ihd_mp); 1127 freemsg(mp); 1128 } 1129 1130 static void 1131 ipnet_input(mblk_t *mp) 1132 { 1133 ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; 1134 1135 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != 1136 DDI_SUCCESS) { 1137 atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops); 1138 freemsg(ihd->ihd_mp); 1139 freemsg(mp); 1140 } 1141 } 1142 1143 /* 1144 * Create a new ipnetif_t and new minor node for it. If creation is 1145 * successful the new ipnetif_t is inserted into an avl_tree 1146 * containing ipnetif's for this stack instance. 1147 */ 1148 static ipnetif_t * 1149 ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips) 1150 { 1151 ipnetif_t *ipnetif; 1152 avl_index_t where = 0; 1153 minor_t ifminor; 1154 1155 /* 1156 * Because ipnet_create_if() can be called from a NIC event 1157 * callback, it should not block. 1158 */ 1159 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); 1160 if (ifminor == (minor_t)-1) 1161 return (NULL); 1162 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) { 1163 id_free(ipnet_minor_space, ifminor); 1164 return (NULL); 1165 } 1166 1167 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); 1168 ipnetif->if_index = index; 1169 1170 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); 1171 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), 1172 offsetof(ipnetif_addr_t, ifa_link)); 1173 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), 1174 offsetof(ipnetif_addr_t, ifa_link)); 1175 ipnetif->if_dev = makedevice(ipnet_major, ifminor); 1176 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); 1177 ipnetif->if_refcnt = 1; 1178 1179 mutex_enter(&ips->ips_avl_lock); 1180 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); 1181 avl_insert(&ips->ips_avl_by_index, ipnetif, where); 1182 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); 1183 avl_insert(&ips->ips_avl_by_name, ipnetif, where); 1184 mutex_exit(&ips->ips_avl_lock); 1185 1186 return (ipnetif); 1187 } 1188 1189 static void 1190 ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1191 { 1192 ipnet_t *ipnet; 1193 1194 ipnet_walkers_inc(ips); 1195 /* Send a SIGHUP to all open streams associated with this ipnetif. */ 1196 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; 1197 ipnet = list_next(&ips->ips_str_list, ipnet)) { 1198 if (ipnet->ipnet_if == ipnetif) 1199 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1200 } 1201 ipnet_walkers_dec(ips); 1202 mutex_enter(&ips->ips_avl_lock); 1203 avl_remove(&ips->ips_avl_by_index, ipnetif); 1204 avl_remove(&ips->ips_avl_by_name, ipnetif); 1205 mutex_exit(&ips->ips_avl_lock); 1206 /* Release the reference we implicitly held in ipnet_create_if(). */ 1207 ipnetif_refrele(ipnetif); 1208 } 1209 1210 static void 1211 ipnet_purge_addrlist(list_t *addrlist) 1212 { 1213 ipnetif_addr_t *ifa; 1214 1215 while ((ifa = list_head(addrlist)) != NULL) { 1216 list_remove(addrlist, ifa); 1217 kmem_free(ifa, sizeof (*ifa)); 1218 } 1219 } 1220 1221 static void 1222 ipnet_free_if(ipnetif_t *ipnetif) 1223 { 1224 ASSERT(ipnetif->if_refcnt == 0); 1225 1226 /* Remove IPv4/v6 address lists from the ipnetif */ 1227 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); 1228 list_destroy(&ipnetif->if_ip4addr_list); 1229 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); 1230 list_destroy(&ipnetif->if_ip6addr_list); 1231 mutex_destroy(&ipnetif->if_addr_lock); 1232 mutex_destroy(&ipnetif->if_reflock); 1233 id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); 1234 kmem_free(ipnetif, sizeof (*ipnetif)); 1235 } 1236 1237 /* 1238 * Create an ipnetif_addr_t with the given logical interface id (lif) 1239 * and add it to the supplied ipnetif. The lif is the netinfo 1240 * representation of logical interface id, and we use this id to match 1241 * incoming netinfo events against our lists of addresses. 1242 */ 1243 static void 1244 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) 1245 { 1246 ipnetif_addr_t *ifaddr; 1247 zoneid_t zoneid; 1248 struct sockaddr_in bcast; 1249 struct sockaddr_storage addr; 1250 net_ifaddr_t type = NA_ADDRESS; 1251 uint64_t phyif = ipnetif->if_index; 1252 1253 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || 1254 net_getlifzone(nd, phyif, lif, &zoneid) != 0) 1255 return; 1256 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) 1257 return; 1258 1259 ifaddr->ifa_zone = zoneid; 1260 ifaddr->ifa_id = lif; 1261 1262 switch (addr.ss_family) { 1263 case AF_INET: 1264 ifaddr->ifa_ip4addr = 1265 ((struct sockaddr_in *)&addr)->sin_addr.s_addr; 1266 /* 1267 * Try and get the broadcast address. Note that it's okay for 1268 * an interface to not have a broadcast address, so we don't 1269 * fail the entire operation if net_getlifaddr() fails here. 1270 */ 1271 type = NA_BROADCAST; 1272 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) 1273 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; 1274 break; 1275 case AF_INET6: 1276 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; 1277 break; 1278 } 1279 1280 mutex_enter(&ipnetif->if_addr_lock); 1281 list_insert_tail(addr.ss_family == AF_INET ? 1282 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); 1283 mutex_exit(&ipnetif->if_addr_lock); 1284 } 1285 1286 static void 1287 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) 1288 { 1289 mutex_enter(&ipnetif->if_addr_lock); 1290 list_remove(isv6 ? 1291 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); 1292 mutex_exit(&ipnetif->if_addr_lock); 1293 kmem_free(ifaddr, sizeof (*ifaddr)); 1294 } 1295 1296 static void 1297 ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips, 1298 boolean_t isv6) 1299 { 1300 ipnetif_t *ipnetif; 1301 boolean_t refrele_needed = B_TRUE; 1302 1303 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) { 1304 ipnetif = ipnet_create_if(ifname, ifindex, ips); 1305 refrele_needed = B_FALSE; 1306 } 1307 if (ipnetif != NULL) { 1308 ipnetif->if_flags |= 1309 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 1310 } 1311 1312 if (ipnetif->if_multicnt != 0) { 1313 if (ip_join_allmulti(ifindex, isv6, 1314 ips->ips_netstack->netstack_ip) == 0) { 1315 ipnetif->if_flags |= 1316 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; 1317 } 1318 } 1319 1320 if (refrele_needed) 1321 ipnetif_refrele(ipnetif); 1322 } 1323 1324 static void 1325 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) 1326 { 1327 ipnetif_t *ipnetif; 1328 1329 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1330 return; 1331 1332 mutex_enter(&ipnetif->if_addr_lock); 1333 ipnet_purge_addrlist(isv6 ? 1334 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); 1335 mutex_exit(&ipnetif->if_addr_lock); 1336 1337 /* 1338 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive 1339 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif 1340 * if both IPv4 and IPv6 interfaces have been unplumbed. 1341 */ 1342 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; 1343 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) 1344 ipnet_remove_if(ipnetif, ips); 1345 ipnetif_refrele(ipnetif); 1346 } 1347 1348 static void 1349 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, 1350 ipnet_stack_t *ips, boolean_t isv6) 1351 { 1352 ipnetif_t *ipnetif; 1353 ipnetif_addr_t *ifaddr; 1354 1355 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1356 return; 1357 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { 1358 /* 1359 * We must have missed a NE_LIF_DOWN event. Delete this 1360 * ifaddr and re-create it. 1361 */ 1362 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1363 } 1364 1365 ipnet_add_ifaddr(lifindex, ipnetif, nd); 1366 ipnetif_refrele(ipnetif); 1367 } 1368 1369 static void 1370 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, 1371 boolean_t isv6) 1372 { 1373 ipnetif_t *ipnetif; 1374 ipnetif_addr_t *ifaddr; 1375 1376 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1377 return; 1378 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) 1379 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1380 ipnetif_refrele(ipnetif); 1381 /* 1382 * Make sure that open streams on this ipnetif are still allowed to 1383 * have it open. 1384 */ 1385 ipnet_if_zonecheck(ipnetif, ips); 1386 } 1387 1388 /* 1389 * This callback from the NIC event framework dispatches a taskq as the event 1390 * handlers may block. 1391 */ 1392 /* ARGSUSED */ 1393 static int 1394 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) 1395 { 1396 ipnet_stack_t *ips = arg; 1397 hook_nic_event_t *hn = (hook_nic_event_t *)info; 1398 ipnet_nicevent_t *ipne; 1399 1400 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) 1401 return (0); 1402 ipne->ipne_event = hn->hne_event; 1403 ipne->ipne_protocol = hn->hne_protocol; 1404 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; 1405 ipne->ipne_ifindex = hn->hne_nic; 1406 ipne->ipne_lifindex = hn->hne_lif; 1407 if (hn->hne_datalen != 0) { 1408 (void) strlcpy(ipne->ipne_ifname, hn->hne_data, 1409 sizeof (ipne->ipne_ifname)); 1410 } 1411 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, 1412 ipne, DDI_NOSLEEP); 1413 return (0); 1414 } 1415 1416 static void 1417 ipnet_nicevent_task(void *arg) 1418 { 1419 ipnet_nicevent_t *ipne = arg; 1420 netstack_t *ns; 1421 ipnet_stack_t *ips; 1422 boolean_t isv6; 1423 1424 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) 1425 goto done; 1426 ips = ns->netstack_ipnet; 1427 isv6 = (ipne->ipne_protocol == ips->ips_ndv6); 1428 1429 mutex_enter(&ips->ips_event_lock); 1430 switch (ipne->ipne_event) { 1431 case NE_PLUMB: 1432 ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips, 1433 isv6); 1434 break; 1435 case NE_UNPLUMB: 1436 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); 1437 break; 1438 case NE_LIF_UP: 1439 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, 1440 ipne->ipne_protocol, ips, isv6); 1441 break; 1442 case NE_LIF_DOWN: 1443 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, 1444 isv6); 1445 break; 1446 default: 1447 break; 1448 } 1449 mutex_exit(&ips->ips_event_lock); 1450 done: 1451 if (ns != NULL) 1452 netstack_rele(ns); 1453 kmem_free(ipne, sizeof (ipnet_nicevent_t)); 1454 } 1455 1456 dev_t 1457 ipnet_if_getdev(char *name, zoneid_t zoneid) 1458 { 1459 netstack_t *ns; 1460 ipnet_stack_t *ips; 1461 ipnetif_t *ipnetif; 1462 dev_t dev = (dev_t)-1; 1463 1464 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1465 return (dev); 1466 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1467 return (dev); 1468 1469 ips = ns->netstack_ipnet; 1470 mutex_enter(&ips->ips_avl_lock); 1471 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { 1472 if (ipnet_if_in_zone(ipnetif, zoneid, ips)) 1473 dev = ipnetif->if_dev; 1474 } 1475 mutex_exit(&ips->ips_avl_lock); 1476 netstack_rele(ns); 1477 1478 return (dev); 1479 } 1480 1481 static ipnetif_t * 1482 ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips) 1483 { 1484 ipnetif_t *ipnetif; 1485 1486 mutex_enter(&ips->ips_avl_lock); 1487 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) 1488 ipnetif_refhold(ipnetif); 1489 mutex_exit(&ips->ips_avl_lock); 1490 return (ipnetif); 1491 } 1492 1493 static ipnetif_t * 1494 ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips) 1495 { 1496 ipnetif_t *ipnetif; 1497 avl_tree_t *tree; 1498 1499 mutex_enter(&ips->ips_avl_lock); 1500 tree = &ips->ips_avl_by_index; 1501 for (ipnetif = avl_first(tree); ipnetif != NULL; 1502 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { 1503 if (ipnetif->if_dev == dev) { 1504 ipnetif_refhold(ipnetif); 1505 break; 1506 } 1507 } 1508 mutex_exit(&ips->ips_avl_lock); 1509 return (ipnetif); 1510 } 1511 1512 static ipnetif_addr_t * 1513 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) 1514 { 1515 ipnetif_addr_t *ifaddr; 1516 list_t *list; 1517 1518 mutex_enter(&ipnetif->if_addr_lock); 1519 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; 1520 for (ifaddr = list_head(list); ifaddr != NULL; 1521 ifaddr = list_next(list, ifaddr)) { 1522 if (lid == ifaddr->ifa_id) 1523 break; 1524 } 1525 mutex_exit(&ipnetif->if_addr_lock); 1526 return (ifaddr); 1527 } 1528 1529 /* ARGSUSED */ 1530 static void * 1531 ipnet_stack_init(netstackid_t stackid, netstack_t *ns) 1532 { 1533 ipnet_stack_t *ips; 1534 1535 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); 1536 ips->ips_netstack = ns; 1537 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); 1538 avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index, 1539 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); 1540 avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name, 1541 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); 1542 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); 1543 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); 1544 list_create(&ips->ips_str_list, sizeof (ipnet_t), 1545 offsetof(ipnet_t, ipnet_next)); 1546 ipnet_register_netihook(ips); 1547 return (ips); 1548 } 1549 1550 /* ARGSUSED */ 1551 static void 1552 ipnet_stack_fini(netstackid_t stackid, void *arg) 1553 { 1554 ipnet_stack_t *ips = arg; 1555 ipnetif_t *ipnetif, *nipnetif; 1556 1557 if (ips->ips_ndv4 != NULL) { 1558 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, 1559 ips->ips_nicevents) == 0); 1560 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 1561 } 1562 if (ips->ips_ndv6 != NULL) { 1563 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, 1564 ips->ips_nicevents) == 0); 1565 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 1566 } 1567 hook_free(ips->ips_nicevents); 1568 1569 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1570 ipnetif = nipnetif) { 1571 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); 1572 ipnet_remove_if(ipnetif, ips); 1573 } 1574 avl_destroy(&ips->ips_avl_by_index); 1575 avl_destroy(&ips->ips_avl_by_name); 1576 mutex_destroy(&ips->ips_avl_lock); 1577 mutex_destroy(&ips->ips_walkers_lock); 1578 cv_destroy(&ips->ips_walkers_cv); 1579 list_destroy(&ips->ips_str_list); 1580 kmem_free(ips, sizeof (*ips)); 1581 } 1582 1583 /* Do any of the addresses in addrlist belong the supplied zoneid? */ 1584 static boolean_t 1585 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) 1586 { 1587 ipnetif_addr_t *ifa; 1588 1589 for (ifa = list_head(addrlist); ifa != NULL; 1590 ifa = list_next(addrlist, ifa)) { 1591 if (ifa->ifa_zone == zoneid) 1592 return (B_TRUE); 1593 } 1594 return (B_FALSE); 1595 } 1596 1597 /* Should the supplied ipnetif be visible from the supplied zoneid? */ 1598 static boolean_t 1599 ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) 1600 { 1601 int ret; 1602 1603 /* 1604 * The global zone has visibility into all interfaces in the global 1605 * stack, and exclusive stack zones have visibility into all 1606 * interfaces in their stack. 1607 */ 1608 if (zoneid == GLOBAL_ZONEID || 1609 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1610 return (B_TRUE); 1611 1612 /* 1613 * Shared-stack zones only have visibility for interfaces that have 1614 * addresses in their zone. 1615 */ 1616 mutex_enter(&ipnetif->if_addr_lock); 1617 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || 1618 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); 1619 mutex_exit(&ipnetif->if_addr_lock); 1620 return (ret); 1621 } 1622 1623 /* 1624 * Verify that any ipnet_t that has a reference to the supplied ipnetif should 1625 * still be allowed to have it open. A given ipnet_t may no longer be allowed 1626 * to have an ipnetif open if there are no longer any addresses that belong to 1627 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the 1628 * case, send the ipnet_t an M_HANGUP. 1629 */ 1630 static void 1631 ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1632 { 1633 list_t *strlist = &ips->ips_str_list; 1634 ipnet_t *ipnet; 1635 1636 ipnet_walkers_inc(ips); 1637 for (ipnet = list_head(strlist); ipnet != NULL; 1638 ipnet = list_next(strlist, ipnet)) { 1639 if (ipnet->ipnet_if != ipnetif) 1640 continue; 1641 if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) 1642 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1643 } 1644 ipnet_walkers_dec(ips); 1645 } 1646 1647 void 1648 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) 1649 { 1650 ipnetif_t *ipnetif; 1651 list_t cbdata; 1652 ipnetif_cbdata_t *cbnode; 1653 netstack_t *ns; 1654 ipnet_stack_t *ips; 1655 1656 /* 1657 * On labeled systems, non-global zones shouldn't see anything 1658 * in /dev/ipnet. 1659 */ 1660 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1661 return; 1662 1663 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1664 return; 1665 1666 ips = ns->netstack_ipnet; 1667 list_create(&cbdata, sizeof (ipnetif_cbdata_t), 1668 offsetof(ipnetif_cbdata_t, ic_next)); 1669 1670 mutex_enter(&ips->ips_avl_lock); 1671 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1672 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { 1673 if (!ipnet_if_in_zone(ipnetif, zoneid, ips)) 1674 continue; 1675 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); 1676 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); 1677 cbnode->ic_dev = ipnetif->if_dev; 1678 list_insert_head(&cbdata, cbnode); 1679 } 1680 mutex_exit(&ips->ips_avl_lock); 1681 1682 while ((cbnode = list_head(&cbdata)) != NULL) { 1683 cb(cbnode->ic_ifname, arg, cbnode->ic_dev); 1684 list_remove(&cbdata, cbnode); 1685 kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); 1686 } 1687 list_destroy(&cbdata); 1688 netstack_rele(ns); 1689 } 1690 1691 static int 1692 ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp) 1693 { 1694 int64_t index1 = *((int64_t *)index_ptr); 1695 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; 1696 1697 return (SIGNOF(index2 - index1)); 1698 } 1699 1700 static int 1701 ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp) 1702 { 1703 int res; 1704 1705 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); 1706 return (SIGNOF(res)); 1707 } 1708 1709 static void 1710 ipnetif_refhold(ipnetif_t *ipnetif) 1711 { 1712 mutex_enter(&ipnetif->if_reflock); 1713 ipnetif->if_refcnt++; 1714 mutex_exit(&ipnetif->if_reflock); 1715 } 1716 1717 static void 1718 ipnetif_refrele(ipnetif_t *ipnetif) 1719 { 1720 mutex_enter(&ipnetif->if_reflock); 1721 ASSERT(ipnetif->if_refcnt != 0); 1722 if (--ipnetif->if_refcnt == 0) 1723 ipnet_free_if(ipnetif); 1724 else 1725 mutex_exit(&ipnetif->if_reflock); 1726 } 1727 1728 static void 1729 ipnet_walkers_inc(ipnet_stack_t *ips) 1730 { 1731 mutex_enter(&ips->ips_walkers_lock); 1732 ips->ips_walkers_cnt++; 1733 mutex_exit(&ips->ips_walkers_lock); 1734 } 1735 1736 static void 1737 ipnet_walkers_dec(ipnet_stack_t *ips) 1738 { 1739 mutex_enter(&ips->ips_walkers_lock); 1740 ASSERT(ips->ips_walkers_cnt != 0); 1741 if (--ips->ips_walkers_cnt == 0) 1742 cv_broadcast(&ips->ips_walkers_cv); 1743 mutex_exit(&ips->ips_walkers_lock); 1744 } 1745