1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * The ipnet device defined here provides access to packets at the IP layer. To 29 * provide access to packets at this layer it registers a callback function in 30 * the ip module and when there are open instances of the device ip will pass 31 * packets into the device. Packets from ip are passed on the input, output and 32 * loopback paths. Internally the module returns to ip as soon as possible by 33 * deferring processing using a taskq. 34 * 35 * Management of the devices in /dev/ipnet/ is handled by the devname 36 * filesystem and use of the neti interfaces. This module registers for NIC 37 * events using the neti framework so that when IP interfaces are bought up, 38 * taken down etc. the ipnet module is notified and its view of the interfaces 39 * configured on the system adjusted. On attach, the module gets an initial 40 * view of the system again using the neti framework but as it has already 41 * registered for IP interface events, it is still up-to-date with any changes. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/conf.h> 46 #include <sys/cred.h> 47 #include <sys/stat.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/modctl.h> 51 #include <sys/dlpi.h> 52 #include <sys/strsun.h> 53 #include <sys/id_space.h> 54 #include <sys/kmem.h> 55 #include <sys/mkdev.h> 56 #include <sys/neti.h> 57 #include <net/if.h> 58 #include <sys/errno.h> 59 #include <sys/list.h> 60 #include <sys/ksynch.h> 61 #include <sys/hook_event.h> 62 #include <sys/stropts.h> 63 #include <sys/sysmacros.h> 64 #include <inet/ip.h> 65 #include <inet/ip_multi.h> 66 #include <inet/ip6.h> 67 #include <inet/ipnet.h> 68 69 static struct module_info ipnet_minfo = { 70 1, /* mi_idnum */ 71 "ipnet", /* mi_idname */ 72 0, /* mi_minpsz */ 73 INFPSZ, /* mi_maxpsz */ 74 2048, /* mi_hiwat */ 75 0 /* mi_lowat */ 76 }; 77 78 /* 79 * List to hold static view of ipnetif_t's on the system. This is needed to 80 * avoid holding the lock protecting the avl tree of ipnetif's over the 81 * callback into the dev filesystem. 82 */ 83 typedef struct ipnetif_cbdata { 84 char ic_ifname[LIFNAMSIZ]; 85 dev_t ic_dev; 86 list_node_t ic_next; 87 } ipnetif_cbdata_t; 88 89 /* 90 * Convenience enumerated type for ipnet_accept(). It describes the 91 * properties of a given ipnet_addrp_t relative to a single ipnet_t 92 * client stream. The values represent whether the address is ... 93 */ 94 typedef enum { 95 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ 96 IPNETADDR_MBCAST, /* a multicast or broadcast address. */ 97 IPNETADDR_UNKNOWN /* none of the above. */ 98 } ipnet_addrtype_t; 99 100 /* Argument used for the ipnet_nicevent_taskq callback. */ 101 typedef struct ipnet_nicevent_s { 102 nic_event_t ipne_event; 103 net_handle_t ipne_protocol; 104 netstackid_t ipne_stackid; 105 uint64_t ipne_ifindex; 106 uint64_t ipne_lifindex; 107 char ipne_ifname[LIFNAMSIZ]; 108 } ipnet_nicevent_t; 109 110 static dev_info_t *ipnet_dip; 111 static major_t ipnet_major; 112 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ 113 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ 114 static id_space_t *ipnet_minor_space; 115 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ 116 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ 117 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; 118 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; 119 120 static void ipnet_input(mblk_t *); 121 static int ipnet_wput(queue_t *, mblk_t *); 122 static int ipnet_rsrv(queue_t *); 123 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); 124 static int ipnet_close(queue_t *); 125 static void ipnet_ioctl(queue_t *, mblk_t *); 126 static void ipnet_iocdata(queue_t *, mblk_t *); 127 static void ipnet_wputnondata(queue_t *, mblk_t *); 128 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); 129 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); 130 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 131 static void ipnet_inforeq(queue_t *q, mblk_t *mp); 132 static void ipnet_bindreq(queue_t *q, mblk_t *mp); 133 static void ipnet_unbindreq(queue_t *q, mblk_t *mp); 134 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); 135 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); 136 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); 137 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); 138 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); 139 static void ipnet_nicevent_task(void *); 140 static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *); 141 static void ipnet_remove_if(ipnetif_t *, ipnet_stack_t *); 142 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); 143 static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *); 144 static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *); 145 static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); 146 static void ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *); 147 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); 148 static int ipnet_if_compare_name(const void *, const void *); 149 static int ipnet_if_compare_index(const void *, const void *); 150 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); 151 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); 152 static void ipnetif_refhold(ipnetif_t *); 153 static void ipnetif_refrele(ipnetif_t *); 154 static void ipnet_walkers_inc(ipnet_stack_t *); 155 static void ipnet_walkers_dec(ipnet_stack_t *); 156 static void ipnet_register_netihook(ipnet_stack_t *); 157 static void *ipnet_stack_init(netstackid_t, netstack_t *); 158 static void ipnet_stack_fini(netstackid_t, void *); 159 160 static struct qinit ipnet_rinit = { 161 NULL, /* qi_putp */ 162 ipnet_rsrv, /* qi_srvp */ 163 ipnet_open, /* qi_qopen */ 164 ipnet_close, /* qi_qclose */ 165 NULL, /* qi_qadmin */ 166 &ipnet_minfo, /* qi_minfo */ 167 }; 168 169 static struct qinit ipnet_winit = { 170 ipnet_wput, /* qi_putp */ 171 NULL, /* qi_srvp */ 172 NULL, /* qi_qopen */ 173 NULL, /* qi_qclose */ 174 NULL, /* qi_qadmin */ 175 &ipnet_minfo, /* qi_minfo */ 176 }; 177 178 static struct streamtab ipnet_info = { 179 &ipnet_rinit, &ipnet_winit 180 }; 181 182 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, 183 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, 184 ddi_quiesce_not_supported); 185 186 static struct modldrv modldrv = { 187 &mod_driverops, 188 "STREAMS ipnet driver", 189 &ipnet_ops 190 }; 191 192 static struct modlinkage modlinkage = { 193 MODREV_1, &modldrv, NULL 194 }; 195 196 /* 197 * Walk the list of physical interfaces on the machine, for each 198 * interface create a new ipnetif_t and add any addresses to it. We 199 * need to do the walk twice, once for IPv4 and once for IPv6. 200 * 201 * The interfaces are destroyed as part of ipnet_stack_fini() for each 202 * stack. Note that we cannot do this initialization in 203 * ipnet_stack_init(), since ipnet_stack_init() cannot fail. 204 */ 205 static int 206 ipnet_if_init(void) 207 { 208 netstack_handle_t nh; 209 netstack_t *ns; 210 ipnet_stack_t *ips; 211 int ret = 0; 212 213 netstack_next_init(&nh); 214 while ((ns = netstack_next(&nh)) != NULL) { 215 ips = ns->netstack_ipnet; 216 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0) 217 ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE); 218 netstack_rele(ns); 219 if (ret != 0) 220 break; 221 } 222 netstack_next_fini(&nh); 223 return (ret); 224 } 225 226 /* 227 * Standard module entry points. 228 */ 229 int 230 _init(void) 231 { 232 int ret; 233 boolean_t netstack_registered = B_FALSE; 234 235 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) 236 return (ENODEV); 237 ipnet_minor_space = id_space_create("ipnet_minor_space", 238 IPNET_MINOR_MIN, MAXMIN32); 239 240 /* 241 * We call ddi_taskq_create() with nthread == 1 to ensure in-order 242 * delivery of packets to clients. Note that we need to create the 243 * taskqs before calling netstack_register() since ipnet_stack_init() 244 * registers callbacks that use 'em. 245 */ 246 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); 247 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", 248 1, TASKQ_DEFAULTPRI, 0); 249 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { 250 ret = ENOMEM; 251 goto done; 252 } 253 254 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); 255 netstack_registered = B_TRUE; 256 257 if ((ret = ipnet_if_init()) == 0) 258 ret = mod_install(&modlinkage); 259 done: 260 if (ret != 0) { 261 if (ipnet_taskq != NULL) 262 ddi_taskq_destroy(ipnet_taskq); 263 if (ipnet_nicevent_taskq != NULL) 264 ddi_taskq_destroy(ipnet_nicevent_taskq); 265 if (netstack_registered) 266 netstack_unregister(NS_IPNET); 267 id_space_destroy(ipnet_minor_space); 268 } 269 return (ret); 270 } 271 272 int 273 _fini(void) 274 { 275 int err; 276 277 if ((err = mod_remove(&modlinkage)) != 0) 278 return (err); 279 280 netstack_unregister(NS_IPNET); 281 ddi_taskq_destroy(ipnet_nicevent_taskq); 282 ddi_taskq_destroy(ipnet_taskq); 283 id_space_destroy(ipnet_minor_space); 284 return (0); 285 } 286 287 int 288 _info(struct modinfo *modinfop) 289 { 290 return (mod_info(&modlinkage, modinfop)); 291 } 292 293 static void 294 ipnet_register_netihook(ipnet_stack_t *ips) 295 { 296 int ret; 297 zoneid_t zoneid; 298 netid_t netid; 299 300 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", 301 ips); 302 303 /* 304 * It is possible for an exclusive stack to be in the process of 305 * shutting down here, and the netid and protocol lookups could fail 306 * in that case. 307 */ 308 zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid); 309 if ((netid = net_zoneidtonetid(zoneid)) == -1) 310 return; 311 312 if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) { 313 if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, 314 ips->ips_nicevents)) != 0) { 315 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 316 ips->ips_ndv4 = NULL; 317 cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks" 318 " in zone %d: %d", zoneid, ret); 319 } 320 } 321 if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) { 322 if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, 323 ips->ips_nicevents)) != 0) { 324 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 325 ips->ips_ndv6 = NULL; 326 cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks" 327 " in zone %d: %d", zoneid, ret); 328 } 329 } 330 } 331 332 /* 333 * This function is called on attach to build an initial view of the 334 * interfaces on the system. It will be called once for IPv4 and once 335 * for IPv6, although there is only one ipnet interface for both IPv4 336 * and IPv6 there are separate address lists. 337 */ 338 static int 339 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) 340 { 341 phy_if_t phyif; 342 lif_if_t lif; 343 ipnetif_t *ipnetif; 344 char name[LIFNAMSIZ]; 345 boolean_t new_if = B_FALSE; 346 uint64_t ifflags; 347 int ret = 0; 348 349 /* 350 * If ipnet_register_netihook() was unable to initialize this 351 * stack's net_handle_t, then we cannot populate any interface 352 * information. This usually happens when we attempted to 353 * grab a net_handle_t as a stack was shutting down. We don't 354 * want to fail the entire _init() operation because of a 355 * stack shutdown (other stacks will continue to work just 356 * fine), so we silently return success here. 357 */ 358 if (nd == NULL) 359 return (0); 360 361 /* 362 * Make sure we're not processing NIC events during the 363 * population of our interfaces and address lists. 364 */ 365 mutex_enter(&ips->ips_event_lock); 366 367 for (phyif = net_phygetnext(nd, 0); phyif != 0; 368 phyif = net_phygetnext(nd, phyif)) { 369 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) 370 continue; 371 if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) { 372 ipnetif = ipnet_create_if(name, phyif, ips); 373 if (ipnetif == NULL) { 374 ret = ENOMEM; 375 goto done; 376 } 377 new_if = B_TRUE; 378 } 379 ipnetif->if_flags |= 380 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 381 382 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; 383 lif = net_lifgetnext(nd, phyif, lif)) { 384 /* 385 * Skip addresses that aren't up. We'll add 386 * them when we receive an NE_LIF_UP event. 387 */ 388 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || 389 !(ifflags & IFF_UP)) 390 continue; 391 /* Don't add it if we already have it. */ 392 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) 393 continue; 394 ipnet_add_ifaddr(lif, ipnetif, nd); 395 } 396 if (!new_if) 397 ipnetif_refrele(ipnetif); 398 } 399 400 done: 401 mutex_exit(&ips->ips_event_lock); 402 return (ret); 403 } 404 405 static int 406 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 407 { 408 if (cmd != DDI_ATTACH) 409 return (DDI_FAILURE); 410 411 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, 412 DDI_PSEUDO, 0) == DDI_FAILURE) 413 return (DDI_FAILURE); 414 415 ipnet_dip = dip; 416 return (DDI_SUCCESS); 417 } 418 419 static int 420 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 421 { 422 if (cmd != DDI_DETACH) 423 return (DDI_FAILURE); 424 425 ASSERT(dip == ipnet_dip); 426 ddi_remove_minor_node(ipnet_dip, NULL); 427 ipnet_dip = NULL; 428 return (DDI_SUCCESS); 429 } 430 431 /* ARGSUSED */ 432 static int 433 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 434 { 435 int error = DDI_FAILURE; 436 437 switch (infocmd) { 438 case DDI_INFO_DEVT2INSTANCE: 439 *result = (void *)0; 440 error = DDI_SUCCESS; 441 break; 442 case DDI_INFO_DEVT2DEVINFO: 443 if (ipnet_dip != NULL) { 444 *result = ipnet_dip; 445 error = DDI_SUCCESS; 446 } 447 break; 448 } 449 return (error); 450 } 451 452 /* ARGSUSED */ 453 static int 454 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) 455 { 456 ipnet_t *ipnet; 457 netstack_t *ns = NULL; 458 ipnet_stack_t *ips; 459 int err = 0; 460 zoneid_t zoneid = crgetzoneid(crp); 461 462 /* 463 * If the system is labeled, only the global zone is allowed to open 464 * IP observability nodes. 465 */ 466 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 467 return (EACCES); 468 469 /* We don't support open as a module */ 470 if (sflag & MODOPEN) 471 return (ENOTSUP); 472 473 /* This driver is self-cloning, we don't support re-open. */ 474 if (rq->q_ptr != NULL) 475 return (EBUSY); 476 477 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) 478 return (ENOMEM); 479 480 VERIFY((ns = netstack_find_by_cred(crp)) != NULL); 481 ips = ns->netstack_ipnet; 482 483 rq->q_ptr = WR(rq)->q_ptr = ipnet; 484 ipnet->ipnet_rq = rq; 485 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); 486 ipnet->ipnet_zoneid = zoneid; 487 ipnet->ipnet_dlstate = DL_UNBOUND; 488 ipnet->ipnet_sap = 0; 489 ipnet->ipnet_ns = ns; 490 491 /* 492 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need 493 * to be processed after ipnet_if is set and the ipnet_t has been 494 * inserted in the ips_str_list. 495 */ 496 mutex_enter(&ips->ips_event_lock); 497 if (getminor(*dev) == IPNET_MINOR_LO) { 498 ipnet->ipnet_flags |= IPNET_LOMODE; 499 ipnet->ipnet_acceptfn = ipnet_loaccept; 500 } else { 501 ipnet->ipnet_acceptfn = ipnet_accept; 502 ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips); 503 if (ipnet->ipnet_if == NULL || 504 !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) { 505 err = ENODEV; 506 goto done; 507 } 508 } 509 510 mutex_enter(&ips->ips_walkers_lock); 511 while (ips->ips_walkers_cnt != 0) 512 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 513 list_insert_head(&ips->ips_str_list, ipnet); 514 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); 515 qprocson(rq); 516 517 /* 518 * Only register our callback if we're the first open client; we call 519 * unregister in close() for the last open client. 520 */ 521 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) 522 ipobs_register_hook(ns, ipnet_input); 523 mutex_exit(&ips->ips_walkers_lock); 524 525 done: 526 mutex_exit(&ips->ips_event_lock); 527 if (err != 0) { 528 netstack_rele(ns); 529 id_free(ipnet_minor_space, ipnet->ipnet_minor); 530 if (ipnet->ipnet_if != NULL) 531 ipnetif_refrele(ipnet->ipnet_if); 532 kmem_free(ipnet, sizeof (*ipnet)); 533 } 534 return (err); 535 } 536 537 static int 538 ipnet_close(queue_t *rq) 539 { 540 ipnet_t *ipnet = rq->q_ptr; 541 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; 542 543 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 544 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 545 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 546 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 547 548 mutex_enter(&ips->ips_walkers_lock); 549 while (ips->ips_walkers_cnt != 0) 550 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 551 552 qprocsoff(rq); 553 554 list_remove(&ips->ips_str_list, ipnet); 555 if (ipnet->ipnet_if != NULL) 556 ipnetif_refrele(ipnet->ipnet_if); 557 id_free(ipnet_minor_space, ipnet->ipnet_minor); 558 kmem_free(ipnet, sizeof (*ipnet)); 559 560 if (list_is_empty(&ips->ips_str_list)) 561 ipobs_unregister_hook(ips->ips_netstack, ipnet_input); 562 563 mutex_exit(&ips->ips_walkers_lock); 564 netstack_rele(ips->ips_netstack); 565 return (0); 566 } 567 568 static int 569 ipnet_wput(queue_t *q, mblk_t *mp) 570 { 571 switch (mp->b_datap->db_type) { 572 case M_FLUSH: 573 if (*mp->b_rptr & FLUSHW) { 574 flushq(q, FLUSHDATA); 575 *mp->b_rptr &= ~FLUSHW; 576 } 577 if (*mp->b_rptr & FLUSHR) 578 qreply(q, mp); 579 else 580 freemsg(mp); 581 break; 582 case M_PROTO: 583 case M_PCPROTO: 584 ipnet_wputnondata(q, mp); 585 break; 586 case M_IOCTL: 587 ipnet_ioctl(q, mp); 588 break; 589 case M_IOCDATA: 590 ipnet_iocdata(q, mp); 591 break; 592 default: 593 freemsg(mp); 594 break; 595 } 596 return (0); 597 } 598 599 static int 600 ipnet_rsrv(queue_t *q) 601 { 602 mblk_t *mp; 603 604 while ((mp = getq(q)) != NULL) { 605 ASSERT(DB_TYPE(mp) == M_DATA); 606 if (canputnext(q)) { 607 putnext(q, mp); 608 } else { 609 (void) putbq(q, mp); 610 break; 611 } 612 } 613 return (0); 614 } 615 616 static void 617 ipnet_ioctl(queue_t *q, mblk_t *mp) 618 { 619 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 620 621 switch (iocp->ioc_cmd) { 622 case DLIOCRAW: 623 miocack(q, mp, 0, 0); 624 break; 625 case DLIOCIPNETINFO: 626 if (iocp->ioc_count == TRANSPARENT) { 627 mcopyin(mp, NULL, sizeof (uint_t), NULL); 628 qreply(q, mp); 629 break; 630 } 631 /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */ 632 default: 633 miocnak(q, mp, 0, EINVAL); 634 break; 635 } 636 } 637 638 static void 639 ipnet_iocdata(queue_t *q, mblk_t *mp) 640 { 641 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 642 ipnet_t *ipnet = q->q_ptr; 643 644 switch (iocp->ioc_cmd) { 645 case DLIOCIPNETINFO: 646 if (*(int *)mp->b_cont->b_rptr == 1) 647 ipnet->ipnet_flags |= IPNET_INFO; 648 else if (*(int *)mp->b_cont->b_rptr == 0) 649 ipnet->ipnet_flags &= ~IPNET_INFO; 650 else 651 goto iocnak; 652 miocack(q, mp, 0, DL_IPNETINFO_VERSION); 653 break; 654 default: 655 iocnak: 656 miocnak(q, mp, 0, EINVAL); 657 break; 658 } 659 } 660 661 static void 662 ipnet_wputnondata(queue_t *q, mblk_t *mp) 663 { 664 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 665 t_uscalar_t prim = dlp->dl_primitive; 666 667 switch (prim) { 668 case DL_INFO_REQ: 669 ipnet_inforeq(q, mp); 670 break; 671 case DL_UNBIND_REQ: 672 ipnet_unbindreq(q, mp); 673 break; 674 case DL_BIND_REQ: 675 ipnet_bindreq(q, mp); 676 break; 677 case DL_PROMISCON_REQ: 678 ipnet_dlpromisconreq(q, mp); 679 break; 680 case DL_PROMISCOFF_REQ: 681 ipnet_dlpromiscoffreq(q, mp); 682 break; 683 case DL_UNITDATA_REQ: 684 case DL_DETACH_REQ: 685 case DL_PHYS_ADDR_REQ: 686 case DL_SET_PHYS_ADDR_REQ: 687 case DL_ENABMULTI_REQ: 688 case DL_DISABMULTI_REQ: 689 case DL_ATTACH_REQ: 690 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); 691 break; 692 default: 693 dlerrorack(q, mp, prim, DL_BADPRIM, 0); 694 break; 695 } 696 } 697 698 static void 699 ipnet_inforeq(queue_t *q, mblk_t *mp) 700 { 701 dl_info_ack_t *dlip; 702 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); 703 704 if (MBLKL(mp) < DL_INFO_REQ_SIZE) { 705 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); 706 return; 707 } 708 709 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) 710 return; 711 712 dlip = (dl_info_ack_t *)mp->b_rptr; 713 *dlip = ipnet_infoack; 714 qreply(q, mp); 715 } 716 717 static void 718 ipnet_bindreq(queue_t *q, mblk_t *mp) 719 { 720 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 721 int32_t sap; 722 ipnet_t *ipnet = q->q_ptr; 723 724 if (MBLKL(mp) < DL_BIND_REQ_SIZE) { 725 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); 726 return; 727 } 728 729 sap = dlp->bind_req.dl_sap; 730 if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) { 731 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); 732 } else { 733 ipnet->ipnet_sap = sap; 734 ipnet->ipnet_dlstate = DL_IDLE; 735 dlbindack(q, mp, sap, 0, 0, 0, 0); 736 } 737 } 738 739 static void 740 ipnet_unbindreq(queue_t *q, mblk_t *mp) 741 { 742 ipnet_t *ipnet = q->q_ptr; 743 744 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { 745 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); 746 return; 747 } 748 749 if (ipnet->ipnet_dlstate != DL_IDLE) { 750 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); 751 } else { 752 ipnet->ipnet_dlstate = DL_UNBOUND; 753 ipnet->ipnet_sap = 0; 754 dlokack(q, mp, DL_UNBIND_REQ); 755 } 756 } 757 758 static void 759 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) 760 { 761 ipnet_t *ipnet = q->q_ptr; 762 t_uscalar_t level; 763 int err; 764 765 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { 766 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 767 return; 768 } 769 770 if (ipnet->ipnet_flags & IPNET_LOMODE) { 771 dlokack(q, mp, DL_PROMISCON_REQ); 772 return; 773 } 774 775 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 776 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 777 if ((err = ipnet_join_allmulti(ipnet->ipnet_if, 778 ipnet->ipnet_ns->netstack_ipnet)) != 0) { 779 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); 780 return; 781 } 782 } 783 784 switch (level) { 785 case DL_PROMISC_PHYS: 786 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; 787 break; 788 case DL_PROMISC_SAP: 789 ipnet->ipnet_flags |= IPNET_PROMISC_SAP; 790 break; 791 case DL_PROMISC_MULTI: 792 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; 793 break; 794 default: 795 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 796 return; 797 } 798 799 dlokack(q, mp, DL_PROMISCON_REQ); 800 } 801 802 static void 803 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) 804 { 805 ipnet_t *ipnet = q->q_ptr; 806 t_uscalar_t level; 807 uint16_t orig_ipnet_flags = ipnet->ipnet_flags; 808 809 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { 810 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 811 return; 812 } 813 814 if (ipnet->ipnet_flags & IPNET_LOMODE) { 815 dlokack(q, mp, DL_PROMISCOFF_REQ); 816 return; 817 } 818 819 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 820 switch (level) { 821 case DL_PROMISC_PHYS: 822 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 823 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; 824 break; 825 case DL_PROMISC_SAP: 826 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) 827 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; 828 break; 829 case DL_PROMISC_MULTI: 830 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 831 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; 832 break; 833 default: 834 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 835 return; 836 } 837 838 if (orig_ipnet_flags == ipnet->ipnet_flags) { 839 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); 840 return; 841 } 842 843 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 844 ipnet_leave_allmulti(ipnet->ipnet_if, 845 ipnet->ipnet_ns->netstack_ipnet); 846 } 847 848 dlokack(q, mp, DL_PROMISCOFF_REQ); 849 } 850 851 static int 852 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 853 { 854 int err = 0; 855 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 856 uint64_t index = ipnetif->if_index; 857 858 mutex_enter(&ips->ips_event_lock); 859 if (ipnetif->if_multicnt == 0) { 860 ASSERT((ipnetif->if_flags & 861 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); 862 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { 863 err = ip_join_allmulti(index, B_FALSE, ipst); 864 if (err != 0) 865 goto done; 866 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; 867 } 868 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { 869 err = ip_join_allmulti(index, B_TRUE, ipst); 870 if (err != 0 && 871 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { 872 (void) ip_leave_allmulti(index, B_FALSE, ipst); 873 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 874 goto done; 875 } 876 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; 877 } 878 } 879 ipnetif->if_multicnt++; 880 881 done: 882 mutex_exit(&ips->ips_event_lock); 883 return (err); 884 } 885 886 static void 887 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 888 { 889 int err; 890 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 891 uint64_t index = ipnetif->if_index; 892 893 mutex_enter(&ips->ips_event_lock); 894 ASSERT(ipnetif->if_multicnt != 0); 895 if (--ipnetif->if_multicnt == 0) { 896 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { 897 err = ip_leave_allmulti(index, B_FALSE, ipst); 898 ASSERT(err == 0 || err == ENODEV); 899 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 900 } 901 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { 902 err = ip_leave_allmulti(index, B_TRUE, ipst); 903 ASSERT(err == 0 || err == ENODEV); 904 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; 905 } 906 } 907 mutex_exit(&ips->ips_event_lock); 908 } 909 910 static mblk_t * 911 ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp) 912 { 913 mblk_t *dlhdr; 914 dl_ipnetinfo_t *dl; 915 916 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { 917 freemsg(mp); 918 return (NULL); 919 } 920 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; 921 dl->dli_version = DL_IPNETINFO_VERSION; 922 dl->dli_len = htons(sizeof (*dl)); 923 dl->dli_ipver = ihd->ihd_ipver; 924 dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc); 925 dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst); 926 dlhdr->b_wptr += sizeof (*dl); 927 dlhdr->b_cont = mp; 928 929 return (dlhdr); 930 } 931 932 static ipnet_addrtype_t 933 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) 934 { 935 list_t *list; 936 ipnetif_t *ipnetif = ipnet->ipnet_if; 937 ipnetif_addr_t *ifaddr; 938 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; 939 940 /* First check if the address is multicast or limited broadcast. */ 941 switch (addr->iap_family) { 942 case AF_INET: 943 if (CLASSD(*(addr->iap_addr4)) || 944 *(addr->iap_addr4) == INADDR_BROADCAST) 945 return (IPNETADDR_MBCAST); 946 break; 947 case AF_INET6: 948 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) 949 return (IPNETADDR_MBCAST); 950 break; 951 } 952 953 /* 954 * Walk the address list to see if the address belongs to our 955 * interface or is one of our subnet broadcast addresses. 956 */ 957 mutex_enter(&ipnetif->if_addr_lock); 958 list = (addr->iap_family == AF_INET) ? 959 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; 960 for (ifaddr = list_head(list); 961 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; 962 ifaddr = list_next(list, ifaddr)) { 963 /* 964 * If we're not in the global zone, then only look at 965 * addresses in our zone. 966 */ 967 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 968 ipnet->ipnet_zoneid != ifaddr->ifa_zone) 969 continue; 970 switch (addr->iap_family) { 971 case AF_INET: 972 if (ifaddr->ifa_ip4addr != INADDR_ANY && 973 *(addr->iap_addr4) == ifaddr->ifa_ip4addr) 974 addrtype = IPNETADDR_MYADDR; 975 else if (ifaddr->ifa_brdaddr != INADDR_ANY && 976 *(addr->iap_addr4) == ifaddr->ifa_brdaddr) 977 addrtype = IPNETADDR_MBCAST; 978 break; 979 case AF_INET6: 980 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, 981 &ifaddr->ifa_ip6addr)) 982 addrtype = IPNETADDR_MYADDR; 983 break; 984 } 985 } 986 mutex_exit(&ipnetif->if_addr_lock); 987 988 return (addrtype); 989 } 990 991 /* 992 * Verify if the packet contained in ihd should be passed up to the 993 * ipnet client stream. 994 */ 995 static boolean_t 996 ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, 997 ipnet_addrp_t *dst) 998 { 999 boolean_t obsif; 1000 uint64_t ifindex = ipnet->ipnet_if->if_index; 1001 ipnet_addrtype_t srctype, dsttype; 1002 1003 srctype = ipnet_get_addrtype(ipnet, src); 1004 dsttype = ipnet_get_addrtype(ipnet, dst); 1005 1006 /* 1007 * If the packet's ifindex matches ours, or the packet's group ifindex 1008 * matches ours, it's on the interface we're observing. (Thus, 1009 * observing on the group ifindex matches all ifindexes in the group.) 1010 */ 1011 obsif = (ihd->ihd_ifindex == ifindex || ihd->ihd_grifindex == ifindex); 1012 1013 /* 1014 * Do not allow an ipnet stream to see packets that are not from or to 1015 * its zone. The exception is when zones are using the shared stack 1016 * model. In this case, streams in the global zone have visibility 1017 * into other shared-stack zones, and broadcast and multicast traffic 1018 * is visible by all zones in the stack. 1019 */ 1020 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 1021 dsttype != IPNETADDR_MBCAST) { 1022 if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && 1023 ipnet->ipnet_zoneid != ihd->ihd_zdst) 1024 return (B_FALSE); 1025 } 1026 1027 /* 1028 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the 1029 * packet's IP version. 1030 */ 1031 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && 1032 ipnet->ipnet_sap != ihd->ihd_ipver) 1033 return (B_FALSE); 1034 1035 /* If the destination address is ours, then accept the packet. */ 1036 if (dsttype == IPNETADDR_MYADDR) 1037 return (B_TRUE); 1038 1039 /* 1040 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are 1041 * sent or received on the interface we're observing, or packets that 1042 * have our source address (this allows us to see packets we send). 1043 */ 1044 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { 1045 if (srctype == IPNETADDR_MYADDR || obsif) 1046 return (B_TRUE); 1047 } 1048 1049 /* 1050 * We accept multicast and broadcast packets transmitted or received 1051 * on the interface we're observing. 1052 */ 1053 if (dsttype == IPNETADDR_MBCAST && obsif) 1054 return (B_TRUE); 1055 1056 return (B_FALSE); 1057 } 1058 1059 /* 1060 * Verify if the packet contained in ihd should be passed up to the ipnet 1061 * client stream that's in IPNET_LOMODE. 1062 */ 1063 /* ARGSUSED */ 1064 static boolean_t 1065 ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, 1066 ipnet_addrp_t *dst) 1067 { 1068 if (ihd->ihd_htype != IPOBS_HOOK_LOCAL) 1069 return (B_FALSE); 1070 1071 /* 1072 * An ipnet stream must not see packets that are not from/to its zone. 1073 */ 1074 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { 1075 if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && 1076 ipnet->ipnet_zoneid != ihd->ihd_zdst) 1077 return (B_FALSE); 1078 } 1079 1080 return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver); 1081 } 1082 1083 static void 1084 ipnet_dispatch(void *arg) 1085 { 1086 mblk_t *mp = arg; 1087 ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; 1088 ipnet_t *ipnet; 1089 mblk_t *netmp; 1090 list_t *list; 1091 ipnet_stack_t *ips = ihd->ihd_stack->netstack_ipnet; 1092 ipnet_addrp_t src, dst; 1093 1094 if (ihd->ihd_ipver == IPV4_VERSION) { 1095 src.iap_family = dst.iap_family = AF_INET; 1096 src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src; 1097 dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst; 1098 } else { 1099 src.iap_family = dst.iap_family = AF_INET6; 1100 src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src; 1101 dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst; 1102 } 1103 1104 ipnet_walkers_inc(ips); 1105 1106 list = &ips->ips_str_list; 1107 for (ipnet = list_head(list); ipnet != NULL; 1108 ipnet = list_next(list, ipnet)) { 1109 if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst)) 1110 continue; 1111 1112 if (list_next(list, ipnet) == NULL) { 1113 netmp = ihd->ihd_mp; 1114 ihd->ihd_mp = NULL; 1115 } else { 1116 if ((netmp = dupmsg(ihd->ihd_mp)) == NULL && 1117 (netmp = copymsg(ihd->ihd_mp)) == NULL) { 1118 atomic_inc_64(&ips->ips_drops); 1119 continue; 1120 } 1121 } 1122 1123 if (ipnet->ipnet_flags & IPNET_INFO) { 1124 if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) { 1125 atomic_inc_64(&ips->ips_drops); 1126 continue; 1127 } 1128 } 1129 1130 if (ipnet->ipnet_rq->q_first == NULL && 1131 canputnext(ipnet->ipnet_rq)) { 1132 putnext(ipnet->ipnet_rq, netmp); 1133 } else if (canput(ipnet->ipnet_rq)) { 1134 (void) putq(ipnet->ipnet_rq, netmp); 1135 } else { 1136 freemsg(netmp); 1137 atomic_inc_64(&ips->ips_drops); 1138 } 1139 } 1140 1141 ipnet_walkers_dec(ips); 1142 1143 freemsg(ihd->ihd_mp); 1144 freemsg(mp); 1145 } 1146 1147 static void 1148 ipnet_input(mblk_t *mp) 1149 { 1150 ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; 1151 1152 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != 1153 DDI_SUCCESS) { 1154 atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops); 1155 freemsg(ihd->ihd_mp); 1156 freemsg(mp); 1157 } 1158 } 1159 1160 /* 1161 * Create a new ipnetif_t and new minor node for it. If creation is 1162 * successful the new ipnetif_t is inserted into an avl_tree 1163 * containing ipnetif's for this stack instance. 1164 */ 1165 static ipnetif_t * 1166 ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips) 1167 { 1168 ipnetif_t *ipnetif; 1169 avl_index_t where = 0; 1170 minor_t ifminor; 1171 1172 /* 1173 * Because ipnet_create_if() can be called from a NIC event 1174 * callback, it should not block. 1175 */ 1176 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); 1177 if (ifminor == (minor_t)-1) 1178 return (NULL); 1179 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) { 1180 id_free(ipnet_minor_space, ifminor); 1181 return (NULL); 1182 } 1183 1184 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); 1185 ipnetif->if_index = index; 1186 1187 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); 1188 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), 1189 offsetof(ipnetif_addr_t, ifa_link)); 1190 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), 1191 offsetof(ipnetif_addr_t, ifa_link)); 1192 ipnetif->if_dev = makedevice(ipnet_major, ifminor); 1193 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); 1194 ipnetif->if_refcnt = 1; 1195 1196 mutex_enter(&ips->ips_avl_lock); 1197 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); 1198 avl_insert(&ips->ips_avl_by_index, ipnetif, where); 1199 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); 1200 avl_insert(&ips->ips_avl_by_name, ipnetif, where); 1201 mutex_exit(&ips->ips_avl_lock); 1202 1203 return (ipnetif); 1204 } 1205 1206 static void 1207 ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1208 { 1209 ipnet_t *ipnet; 1210 1211 ipnet_walkers_inc(ips); 1212 /* Send a SIGHUP to all open streams associated with this ipnetif. */ 1213 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; 1214 ipnet = list_next(&ips->ips_str_list, ipnet)) { 1215 if (ipnet->ipnet_if == ipnetif) 1216 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1217 } 1218 ipnet_walkers_dec(ips); 1219 mutex_enter(&ips->ips_avl_lock); 1220 avl_remove(&ips->ips_avl_by_index, ipnetif); 1221 avl_remove(&ips->ips_avl_by_name, ipnetif); 1222 mutex_exit(&ips->ips_avl_lock); 1223 /* Release the reference we implicitly held in ipnet_create_if(). */ 1224 ipnetif_refrele(ipnetif); 1225 } 1226 1227 static void 1228 ipnet_purge_addrlist(list_t *addrlist) 1229 { 1230 ipnetif_addr_t *ifa; 1231 1232 while ((ifa = list_head(addrlist)) != NULL) { 1233 list_remove(addrlist, ifa); 1234 kmem_free(ifa, sizeof (*ifa)); 1235 } 1236 } 1237 1238 static void 1239 ipnet_free_if(ipnetif_t *ipnetif) 1240 { 1241 ASSERT(ipnetif->if_refcnt == 0); 1242 1243 /* Remove IPv4/v6 address lists from the ipnetif */ 1244 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); 1245 list_destroy(&ipnetif->if_ip4addr_list); 1246 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); 1247 list_destroy(&ipnetif->if_ip6addr_list); 1248 mutex_destroy(&ipnetif->if_addr_lock); 1249 mutex_destroy(&ipnetif->if_reflock); 1250 id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); 1251 kmem_free(ipnetif, sizeof (*ipnetif)); 1252 } 1253 1254 /* 1255 * Create an ipnetif_addr_t with the given logical interface id (lif) 1256 * and add it to the supplied ipnetif. The lif is the netinfo 1257 * representation of logical interface id, and we use this id to match 1258 * incoming netinfo events against our lists of addresses. 1259 */ 1260 static void 1261 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) 1262 { 1263 ipnetif_addr_t *ifaddr; 1264 zoneid_t zoneid; 1265 struct sockaddr_in bcast; 1266 struct sockaddr_storage addr; 1267 net_ifaddr_t type = NA_ADDRESS; 1268 uint64_t phyif = ipnetif->if_index; 1269 1270 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || 1271 net_getlifzone(nd, phyif, lif, &zoneid) != 0) 1272 return; 1273 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) 1274 return; 1275 1276 ifaddr->ifa_zone = zoneid; 1277 ifaddr->ifa_id = lif; 1278 1279 switch (addr.ss_family) { 1280 case AF_INET: 1281 ifaddr->ifa_ip4addr = 1282 ((struct sockaddr_in *)&addr)->sin_addr.s_addr; 1283 /* 1284 * Try and get the broadcast address. Note that it's okay for 1285 * an interface to not have a broadcast address, so we don't 1286 * fail the entire operation if net_getlifaddr() fails here. 1287 */ 1288 type = NA_BROADCAST; 1289 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) 1290 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; 1291 break; 1292 case AF_INET6: 1293 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; 1294 break; 1295 } 1296 1297 mutex_enter(&ipnetif->if_addr_lock); 1298 list_insert_tail(addr.ss_family == AF_INET ? 1299 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); 1300 mutex_exit(&ipnetif->if_addr_lock); 1301 } 1302 1303 static void 1304 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) 1305 { 1306 mutex_enter(&ipnetif->if_addr_lock); 1307 list_remove(isv6 ? 1308 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); 1309 mutex_exit(&ipnetif->if_addr_lock); 1310 kmem_free(ifaddr, sizeof (*ifaddr)); 1311 } 1312 1313 static void 1314 ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips, 1315 boolean_t isv6) 1316 { 1317 ipnetif_t *ipnetif; 1318 boolean_t refrele_needed = B_TRUE; 1319 1320 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) { 1321 ipnetif = ipnet_create_if(ifname, ifindex, ips); 1322 refrele_needed = B_FALSE; 1323 } 1324 if (ipnetif != NULL) { 1325 ipnetif->if_flags |= 1326 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 1327 } 1328 1329 if (ipnetif->if_multicnt != 0) { 1330 if (ip_join_allmulti(ifindex, isv6, 1331 ips->ips_netstack->netstack_ip) == 0) { 1332 ipnetif->if_flags |= 1333 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; 1334 } 1335 } 1336 1337 if (refrele_needed) 1338 ipnetif_refrele(ipnetif); 1339 } 1340 1341 static void 1342 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) 1343 { 1344 ipnetif_t *ipnetif; 1345 1346 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1347 return; 1348 1349 mutex_enter(&ipnetif->if_addr_lock); 1350 ipnet_purge_addrlist(isv6 ? 1351 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); 1352 mutex_exit(&ipnetif->if_addr_lock); 1353 1354 /* 1355 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive 1356 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif 1357 * if both IPv4 and IPv6 interfaces have been unplumbed. 1358 */ 1359 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; 1360 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) 1361 ipnet_remove_if(ipnetif, ips); 1362 ipnetif_refrele(ipnetif); 1363 } 1364 1365 static void 1366 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, 1367 ipnet_stack_t *ips, boolean_t isv6) 1368 { 1369 ipnetif_t *ipnetif; 1370 ipnetif_addr_t *ifaddr; 1371 1372 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1373 return; 1374 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { 1375 /* 1376 * We must have missed a NE_LIF_DOWN event. Delete this 1377 * ifaddr and re-create it. 1378 */ 1379 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1380 } 1381 1382 ipnet_add_ifaddr(lifindex, ipnetif, nd); 1383 ipnetif_refrele(ipnetif); 1384 } 1385 1386 static void 1387 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, 1388 boolean_t isv6) 1389 { 1390 ipnetif_t *ipnetif; 1391 ipnetif_addr_t *ifaddr; 1392 1393 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1394 return; 1395 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) 1396 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1397 ipnetif_refrele(ipnetif); 1398 /* 1399 * Make sure that open streams on this ipnetif are still allowed to 1400 * have it open. 1401 */ 1402 ipnet_if_zonecheck(ipnetif, ips); 1403 } 1404 1405 /* 1406 * This callback from the NIC event framework dispatches a taskq as the event 1407 * handlers may block. 1408 */ 1409 /* ARGSUSED */ 1410 static int 1411 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) 1412 { 1413 ipnet_stack_t *ips = arg; 1414 hook_nic_event_t *hn = (hook_nic_event_t *)info; 1415 ipnet_nicevent_t *ipne; 1416 1417 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) 1418 return (0); 1419 ipne->ipne_event = hn->hne_event; 1420 ipne->ipne_protocol = hn->hne_protocol; 1421 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; 1422 ipne->ipne_ifindex = hn->hne_nic; 1423 ipne->ipne_lifindex = hn->hne_lif; 1424 if (hn->hne_datalen != 0) { 1425 (void) strlcpy(ipne->ipne_ifname, hn->hne_data, 1426 sizeof (ipne->ipne_ifname)); 1427 } 1428 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, 1429 ipne, DDI_NOSLEEP); 1430 return (0); 1431 } 1432 1433 static void 1434 ipnet_nicevent_task(void *arg) 1435 { 1436 ipnet_nicevent_t *ipne = arg; 1437 netstack_t *ns; 1438 ipnet_stack_t *ips; 1439 boolean_t isv6; 1440 1441 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) 1442 goto done; 1443 ips = ns->netstack_ipnet; 1444 isv6 = (ipne->ipne_protocol == ips->ips_ndv6); 1445 1446 mutex_enter(&ips->ips_event_lock); 1447 switch (ipne->ipne_event) { 1448 case NE_PLUMB: 1449 ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips, 1450 isv6); 1451 break; 1452 case NE_UNPLUMB: 1453 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); 1454 break; 1455 case NE_LIF_UP: 1456 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, 1457 ipne->ipne_protocol, ips, isv6); 1458 break; 1459 case NE_LIF_DOWN: 1460 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, 1461 isv6); 1462 break; 1463 default: 1464 break; 1465 } 1466 mutex_exit(&ips->ips_event_lock); 1467 done: 1468 if (ns != NULL) 1469 netstack_rele(ns); 1470 kmem_free(ipne, sizeof (ipnet_nicevent_t)); 1471 } 1472 1473 dev_t 1474 ipnet_if_getdev(char *name, zoneid_t zoneid) 1475 { 1476 netstack_t *ns; 1477 ipnet_stack_t *ips; 1478 ipnetif_t *ipnetif; 1479 dev_t dev = (dev_t)-1; 1480 1481 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1482 return (dev); 1483 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1484 return (dev); 1485 1486 ips = ns->netstack_ipnet; 1487 mutex_enter(&ips->ips_avl_lock); 1488 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { 1489 if (ipnet_if_in_zone(ipnetif, zoneid, ips)) 1490 dev = ipnetif->if_dev; 1491 } 1492 mutex_exit(&ips->ips_avl_lock); 1493 netstack_rele(ns); 1494 1495 return (dev); 1496 } 1497 1498 static ipnetif_t * 1499 ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips) 1500 { 1501 ipnetif_t *ipnetif; 1502 1503 mutex_enter(&ips->ips_avl_lock); 1504 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) 1505 ipnetif_refhold(ipnetif); 1506 mutex_exit(&ips->ips_avl_lock); 1507 return (ipnetif); 1508 } 1509 1510 static ipnetif_t * 1511 ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips) 1512 { 1513 ipnetif_t *ipnetif; 1514 avl_tree_t *tree; 1515 1516 mutex_enter(&ips->ips_avl_lock); 1517 tree = &ips->ips_avl_by_index; 1518 for (ipnetif = avl_first(tree); ipnetif != NULL; 1519 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { 1520 if (ipnetif->if_dev == dev) { 1521 ipnetif_refhold(ipnetif); 1522 break; 1523 } 1524 } 1525 mutex_exit(&ips->ips_avl_lock); 1526 return (ipnetif); 1527 } 1528 1529 static ipnetif_addr_t * 1530 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) 1531 { 1532 ipnetif_addr_t *ifaddr; 1533 list_t *list; 1534 1535 mutex_enter(&ipnetif->if_addr_lock); 1536 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; 1537 for (ifaddr = list_head(list); ifaddr != NULL; 1538 ifaddr = list_next(list, ifaddr)) { 1539 if (lid == ifaddr->ifa_id) 1540 break; 1541 } 1542 mutex_exit(&ipnetif->if_addr_lock); 1543 return (ifaddr); 1544 } 1545 1546 /* ARGSUSED */ 1547 static void * 1548 ipnet_stack_init(netstackid_t stackid, netstack_t *ns) 1549 { 1550 ipnet_stack_t *ips; 1551 1552 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); 1553 ips->ips_netstack = ns; 1554 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); 1555 avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index, 1556 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); 1557 avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name, 1558 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); 1559 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); 1560 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); 1561 list_create(&ips->ips_str_list, sizeof (ipnet_t), 1562 offsetof(ipnet_t, ipnet_next)); 1563 ipnet_register_netihook(ips); 1564 return (ips); 1565 } 1566 1567 /* ARGSUSED */ 1568 static void 1569 ipnet_stack_fini(netstackid_t stackid, void *arg) 1570 { 1571 ipnet_stack_t *ips = arg; 1572 ipnetif_t *ipnetif, *nipnetif; 1573 1574 if (ips->ips_ndv4 != NULL) { 1575 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, 1576 ips->ips_nicevents) == 0); 1577 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 1578 } 1579 if (ips->ips_ndv6 != NULL) { 1580 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, 1581 ips->ips_nicevents) == 0); 1582 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 1583 } 1584 hook_free(ips->ips_nicevents); 1585 1586 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1587 ipnetif = nipnetif) { 1588 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); 1589 ipnet_remove_if(ipnetif, ips); 1590 } 1591 avl_destroy(&ips->ips_avl_by_index); 1592 avl_destroy(&ips->ips_avl_by_name); 1593 mutex_destroy(&ips->ips_avl_lock); 1594 mutex_destroy(&ips->ips_walkers_lock); 1595 cv_destroy(&ips->ips_walkers_cv); 1596 list_destroy(&ips->ips_str_list); 1597 kmem_free(ips, sizeof (*ips)); 1598 } 1599 1600 /* Do any of the addresses in addrlist belong the supplied zoneid? */ 1601 static boolean_t 1602 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) 1603 { 1604 ipnetif_addr_t *ifa; 1605 1606 for (ifa = list_head(addrlist); ifa != NULL; 1607 ifa = list_next(addrlist, ifa)) { 1608 if (ifa->ifa_zone == zoneid) 1609 return (B_TRUE); 1610 } 1611 return (B_FALSE); 1612 } 1613 1614 /* Should the supplied ipnetif be visible from the supplied zoneid? */ 1615 static boolean_t 1616 ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) 1617 { 1618 int ret; 1619 1620 /* 1621 * The global zone has visibility into all interfaces in the global 1622 * stack, and exclusive stack zones have visibility into all 1623 * interfaces in their stack. 1624 */ 1625 if (zoneid == GLOBAL_ZONEID || 1626 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1627 return (B_TRUE); 1628 1629 /* 1630 * Shared-stack zones only have visibility for interfaces that have 1631 * addresses in their zone. 1632 */ 1633 mutex_enter(&ipnetif->if_addr_lock); 1634 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || 1635 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); 1636 mutex_exit(&ipnetif->if_addr_lock); 1637 return (ret); 1638 } 1639 1640 /* 1641 * Verify that any ipnet_t that has a reference to the supplied ipnetif should 1642 * still be allowed to have it open. A given ipnet_t may no longer be allowed 1643 * to have an ipnetif open if there are no longer any addresses that belong to 1644 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the 1645 * case, send the ipnet_t an M_HANGUP. 1646 */ 1647 static void 1648 ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1649 { 1650 list_t *strlist = &ips->ips_str_list; 1651 ipnet_t *ipnet; 1652 1653 ipnet_walkers_inc(ips); 1654 for (ipnet = list_head(strlist); ipnet != NULL; 1655 ipnet = list_next(strlist, ipnet)) { 1656 if (ipnet->ipnet_if != ipnetif) 1657 continue; 1658 if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) 1659 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1660 } 1661 ipnet_walkers_dec(ips); 1662 } 1663 1664 void 1665 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) 1666 { 1667 ipnetif_t *ipnetif; 1668 list_t cbdata; 1669 ipnetif_cbdata_t *cbnode; 1670 netstack_t *ns; 1671 ipnet_stack_t *ips; 1672 1673 /* 1674 * On labeled systems, non-global zones shouldn't see anything 1675 * in /dev/ipnet. 1676 */ 1677 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1678 return; 1679 1680 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1681 return; 1682 1683 ips = ns->netstack_ipnet; 1684 list_create(&cbdata, sizeof (ipnetif_cbdata_t), 1685 offsetof(ipnetif_cbdata_t, ic_next)); 1686 1687 mutex_enter(&ips->ips_avl_lock); 1688 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1689 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { 1690 if (!ipnet_if_in_zone(ipnetif, zoneid, ips)) 1691 continue; 1692 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); 1693 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); 1694 cbnode->ic_dev = ipnetif->if_dev; 1695 list_insert_head(&cbdata, cbnode); 1696 } 1697 mutex_exit(&ips->ips_avl_lock); 1698 1699 while ((cbnode = list_head(&cbdata)) != NULL) { 1700 cb(cbnode->ic_ifname, arg, cbnode->ic_dev); 1701 list_remove(&cbdata, cbnode); 1702 kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); 1703 } 1704 list_destroy(&cbdata); 1705 netstack_rele(ns); 1706 } 1707 1708 static int 1709 ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp) 1710 { 1711 int64_t index1 = *((int64_t *)index_ptr); 1712 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; 1713 1714 return (SIGNOF(index2 - index1)); 1715 } 1716 1717 static int 1718 ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp) 1719 { 1720 int res; 1721 1722 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); 1723 return (SIGNOF(res)); 1724 } 1725 1726 static void 1727 ipnetif_refhold(ipnetif_t *ipnetif) 1728 { 1729 mutex_enter(&ipnetif->if_reflock); 1730 ipnetif->if_refcnt++; 1731 mutex_exit(&ipnetif->if_reflock); 1732 } 1733 1734 static void 1735 ipnetif_refrele(ipnetif_t *ipnetif) 1736 { 1737 mutex_enter(&ipnetif->if_reflock); 1738 ASSERT(ipnetif->if_refcnt != 0); 1739 if (--ipnetif->if_refcnt == 0) 1740 ipnet_free_if(ipnetif); 1741 else 1742 mutex_exit(&ipnetif->if_reflock); 1743 } 1744 1745 static void 1746 ipnet_walkers_inc(ipnet_stack_t *ips) 1747 { 1748 mutex_enter(&ips->ips_walkers_lock); 1749 ips->ips_walkers_cnt++; 1750 mutex_exit(&ips->ips_walkers_lock); 1751 } 1752 1753 static void 1754 ipnet_walkers_dec(ipnet_stack_t *ips) 1755 { 1756 mutex_enter(&ips->ips_walkers_lock); 1757 ASSERT(ips->ips_walkers_cnt != 0); 1758 if (--ips->ips_walkers_cnt == 0) 1759 cv_broadcast(&ips->ips_walkers_cv); 1760 mutex_exit(&ips->ips_walkers_lock); 1761 } 1762