1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * The ipnet device defined here provides access to packets at the IP layer. To 29 * provide access to packets at this layer it registers a callback function in 30 * the ip module and when there are open instances of the device ip will pass 31 * packets into the device. Packets from ip are passed on the input, output and 32 * loopback paths. Internally the module returns to ip as soon as possible by 33 * deferring processing using a taskq. 34 * 35 * Management of the devices in /dev/ipnet/ is handled by the devname 36 * filesystem and use of the neti interfaces. This module registers for NIC 37 * events using the neti framework so that when IP interfaces are bought up, 38 * taken down etc. the ipnet module is notified and its view of the interfaces 39 * configured on the system adjusted. On attach, the module gets an initial 40 * view of the system again using the neti framework but as it has already 41 * registered for IP interface events, it is still up-to-date with any changes. 42 */ 43 44 #include <sys/types.h> 45 #include <sys/conf.h> 46 #include <sys/cred.h> 47 #include <sys/stat.h> 48 #include <sys/ddi.h> 49 #include <sys/sunddi.h> 50 #include <sys/modctl.h> 51 #include <sys/dlpi.h> 52 #include <sys/strsun.h> 53 #include <sys/id_space.h> 54 #include <sys/kmem.h> 55 #include <sys/mkdev.h> 56 #include <sys/neti.h> 57 #include <net/if.h> 58 #include <sys/errno.h> 59 #include <sys/list.h> 60 #include <sys/ksynch.h> 61 #include <sys/hook_event.h> 62 #include <sys/stropts.h> 63 #include <sys/sysmacros.h> 64 #include <inet/ip.h> 65 #include <inet/ip_multi.h> 66 #include <inet/ip6.h> 67 #include <inet/ipnet.h> 68 69 static struct module_info ipnet_minfo = { 70 1, /* mi_idnum */ 71 "ipnet", /* mi_idname */ 72 0, /* mi_minpsz */ 73 INFPSZ, /* mi_maxpsz */ 74 2048, /* mi_hiwat */ 75 0 /* mi_lowat */ 76 }; 77 78 /* 79 * List to hold static view of ipnetif_t's on the system. This is needed to 80 * avoid holding the lock protecting the avl tree of ipnetif's over the 81 * callback into the dev filesystem. 82 */ 83 typedef struct ipnetif_cbdata { 84 char ic_ifname[LIFNAMSIZ]; 85 dev_t ic_dev; 86 list_node_t ic_next; 87 } ipnetif_cbdata_t; 88 89 /* 90 * Convenience enumerated type for ipnet_accept(). It describes the 91 * properties of a given ipnet_addrp_t relative to a single ipnet_t 92 * client stream. The values represent whether the address is ... 93 */ 94 typedef enum { 95 IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ 96 IPNETADDR_MBCAST, /* a multicast or broadcast address. */ 97 IPNETADDR_UNKNOWN /* none of the above. */ 98 } ipnet_addrtype_t; 99 100 /* Argument used for the ipnet_nicevent_taskq callback. */ 101 typedef struct ipnet_nicevent_s { 102 nic_event_t ipne_event; 103 net_handle_t ipne_protocol; 104 netstackid_t ipne_stackid; 105 uint64_t ipne_ifindex; 106 uint64_t ipne_lifindex; 107 char ipne_ifname[LIFNAMSIZ]; 108 } ipnet_nicevent_t; 109 110 static dev_info_t *ipnet_dip; 111 static major_t ipnet_major; 112 static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ 113 static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ 114 static id_space_t *ipnet_minor_space; 115 static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ 116 static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ 117 static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; 118 static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; 119 120 static void ipnet_input(mblk_t *); 121 static int ipnet_wput(queue_t *, mblk_t *); 122 static int ipnet_rsrv(queue_t *); 123 static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); 124 static int ipnet_close(queue_t *); 125 static void ipnet_ioctl(queue_t *, mblk_t *); 126 static void ipnet_iocdata(queue_t *, mblk_t *); 127 static void ipnet_wputnondata(queue_t *, mblk_t *); 128 static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); 129 static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); 130 static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); 131 static void ipnet_inforeq(queue_t *q, mblk_t *mp); 132 static void ipnet_bindreq(queue_t *q, mblk_t *mp); 133 static void ipnet_unbindreq(queue_t *q, mblk_t *mp); 134 static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); 135 static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); 136 static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); 137 static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); 138 static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); 139 static void ipnet_nicevent_task(void *); 140 static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *); 141 static void ipnet_remove_if(ipnetif_t *, ipnet_stack_t *); 142 static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); 143 static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *); 144 static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *); 145 static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); 146 static void ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *); 147 static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); 148 static int ipnet_if_compare_name(const void *, const void *); 149 static int ipnet_if_compare_index(const void *, const void *); 150 static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); 151 static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); 152 static void ipnetif_refhold(ipnetif_t *); 153 static void ipnetif_refrele(ipnetif_t *); 154 static void ipnet_walkers_inc(ipnet_stack_t *); 155 static void ipnet_walkers_dec(ipnet_stack_t *); 156 static void ipnet_register_netihook(ipnet_stack_t *); 157 static void *ipnet_stack_init(netstackid_t, netstack_t *); 158 static void ipnet_stack_fini(netstackid_t, void *); 159 160 static struct qinit ipnet_rinit = { 161 NULL, /* qi_putp */ 162 ipnet_rsrv, /* qi_srvp */ 163 ipnet_open, /* qi_qopen */ 164 ipnet_close, /* qi_qclose */ 165 NULL, /* qi_qadmin */ 166 &ipnet_minfo, /* qi_minfo */ 167 }; 168 169 static struct qinit ipnet_winit = { 170 ipnet_wput, /* qi_putp */ 171 NULL, /* qi_srvp */ 172 NULL, /* qi_qopen */ 173 NULL, /* qi_qclose */ 174 NULL, /* qi_qadmin */ 175 &ipnet_minfo, /* qi_minfo */ 176 }; 177 178 static struct streamtab ipnet_info = { 179 &ipnet_rinit, &ipnet_winit 180 }; 181 182 DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, 183 ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, 184 ddi_quiesce_not_supported); 185 186 static struct modldrv modldrv = { 187 &mod_driverops, 188 "STREAMS ipnet driver", 189 &ipnet_ops 190 }; 191 192 static struct modlinkage modlinkage = { 193 MODREV_1, &modldrv, NULL 194 }; 195 196 /* 197 * Walk the list of physical interfaces on the machine, for each 198 * interface create a new ipnetif_t and add any addresses to it. We 199 * need to do the walk twice, once for IPv4 and once for IPv6. 200 * 201 * The interfaces are destroyed as part of ipnet_stack_fini() for each 202 * stack. Note that we cannot do this initialization in 203 * ipnet_stack_init(), since ipnet_stack_init() cannot fail. 204 */ 205 static int 206 ipnet_if_init(void) 207 { 208 netstack_handle_t nh; 209 netstack_t *ns; 210 ipnet_stack_t *ips; 211 int ret = 0; 212 213 netstack_next_init(&nh); 214 while ((ns = netstack_next(&nh)) != NULL) { 215 ips = ns->netstack_ipnet; 216 if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) != 0) 217 break; 218 if ((ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE)) != 0) 219 break; 220 } 221 netstack_next_fini(&nh); 222 return (ret); 223 } 224 225 /* 226 * Standard module entry points. 227 */ 228 int 229 _init(void) 230 { 231 int ret; 232 233 if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) 234 return (ENODEV); 235 ipnet_minor_space = id_space_create("ipnet_minor_space", 236 IPNET_MINOR_MIN, MAXMIN32); 237 netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); 238 /* 239 * We call ddi_taskq_create() with nthread == 1 to ensure in-order 240 * delivery of packets to clients. 241 */ 242 ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); 243 ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", 244 1, TASKQ_DEFAULTPRI, 0); 245 if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { 246 ret = ENOMEM; 247 goto done; 248 } 249 if ((ret = ipnet_if_init()) == 0) 250 ret = mod_install(&modlinkage); 251 done: 252 if (ret != 0) { 253 if (ipnet_taskq != NULL) 254 ddi_taskq_destroy(ipnet_taskq); 255 if (ipnet_nicevent_taskq != NULL) 256 ddi_taskq_destroy(ipnet_nicevent_taskq); 257 netstack_unregister(NS_IPNET); 258 id_space_destroy(ipnet_minor_space); 259 } 260 return (ret); 261 } 262 263 int 264 _fini(void) 265 { 266 int err; 267 268 if ((err = mod_remove(&modlinkage)) != 0) 269 return (err); 270 ddi_taskq_destroy(ipnet_nicevent_taskq); 271 ddi_taskq_destroy(ipnet_taskq); 272 netstack_unregister(NS_IPNET); 273 id_space_destroy(ipnet_minor_space); 274 return (0); 275 } 276 277 int 278 _info(struct modinfo *modinfop) 279 { 280 return (mod_info(&modlinkage, modinfop)); 281 } 282 283 static void 284 ipnet_register_netihook(ipnet_stack_t *ips) 285 { 286 int ret; 287 netstackid_t stackid = ips->ips_netstack->netstack_stackid; 288 289 HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", 290 ips); 291 292 /* 293 * The ipnet device depends on ip and is registered in the netstack 294 * framework after ip so the call to net_lookup_impl() cannot fail. 295 */ 296 ips->ips_ndv4 = net_protocol_lookup(stackid, NHF_INET); 297 ips->ips_ndv6 = net_protocol_lookup(stackid, NHF_INET6); 298 299 ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, 300 ips->ips_nicevents); 301 if (ret != 0) { 302 cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() " 303 "failed for v4 stack instance %d: %d", stackid, ret); 304 } 305 ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, 306 ips->ips_nicevents); 307 if (ret != 0) { 308 cmn_err(CE_WARN, "ipnet_register_netihook: net_register_hook() " 309 "failed for v6 stack instance %d: %d", stackid, ret); 310 } 311 } 312 313 /* 314 * This function is called on attach to build an initial view of the 315 * interfaces on the system. It will be called once for IPv4 and once 316 * for IPv6, although there is only one ipnet interface for both IPv4 317 * and IPv6 there are separate address lists. 318 */ 319 static int 320 ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) 321 { 322 phy_if_t phyif; 323 lif_if_t lif; 324 ipnetif_t *ipnetif; 325 char name[LIFNAMSIZ]; 326 boolean_t new_if = B_FALSE; 327 uint64_t ifflags; 328 int ret = 0; 329 330 /* 331 * Make sure we're not processing NIC events during the 332 * population of our interfaces and address lists. 333 */ 334 mutex_enter(&ips->ips_event_lock); 335 336 for (phyif = net_phygetnext(nd, 0); phyif != 0; 337 phyif = net_phygetnext(nd, phyif)) { 338 if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) 339 continue; 340 if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) { 341 ipnetif = ipnet_create_if(name, phyif, ips); 342 if (ipnetif == NULL) { 343 ret = ENOMEM; 344 goto done; 345 } 346 new_if = B_TRUE; 347 } 348 ipnetif->if_flags |= 349 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 350 351 for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; 352 lif = net_lifgetnext(nd, phyif, lif)) { 353 /* 354 * Skip addresses that aren't up. We'll add 355 * them when we receive an NE_LIF_UP event. 356 */ 357 if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || 358 !(ifflags & IFF_UP)) 359 continue; 360 /* Don't add it if we already have it. */ 361 if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) 362 continue; 363 ipnet_add_ifaddr(lif, ipnetif, nd); 364 } 365 if (!new_if) 366 ipnetif_refrele(ipnetif); 367 } 368 369 done: 370 mutex_exit(&ips->ips_event_lock); 371 return (ret); 372 } 373 374 static int 375 ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 376 { 377 if (cmd != DDI_ATTACH) 378 return (DDI_FAILURE); 379 380 if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, 381 DDI_PSEUDO, 0) == DDI_FAILURE) 382 return (DDI_FAILURE); 383 384 ipnet_dip = dip; 385 return (DDI_SUCCESS); 386 } 387 388 static int 389 ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 390 { 391 if (cmd != DDI_DETACH) 392 return (DDI_FAILURE); 393 394 ASSERT(dip == ipnet_dip); 395 ddi_remove_minor_node(ipnet_dip, NULL); 396 ipnet_dip = NULL; 397 return (DDI_SUCCESS); 398 } 399 400 /* ARGSUSED */ 401 static int 402 ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) 403 { 404 int error = DDI_FAILURE; 405 406 switch (infocmd) { 407 case DDI_INFO_DEVT2INSTANCE: 408 *result = (void *)0; 409 error = DDI_SUCCESS; 410 break; 411 case DDI_INFO_DEVT2DEVINFO: 412 if (ipnet_dip != NULL) { 413 *result = ipnet_dip; 414 error = DDI_SUCCESS; 415 } 416 break; 417 } 418 return (error); 419 } 420 421 /* ARGSUSED */ 422 static int 423 ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) 424 { 425 ipnet_t *ipnet; 426 netstack_t *ns = NULL; 427 ipnet_stack_t *ips; 428 int err = 0; 429 zoneid_t zoneid = crgetzoneid(crp); 430 431 /* 432 * If the system is labeled, only the global zone is allowed to open 433 * IP observability nodes. 434 */ 435 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 436 return (EACCES); 437 438 /* We don't support open as a module */ 439 if (sflag & MODOPEN) 440 return (ENOTSUP); 441 442 /* This driver is self-cloning, we don't support re-open. */ 443 if (rq->q_ptr != NULL) 444 return (EBUSY); 445 446 if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) 447 return (ENOMEM); 448 449 VERIFY((ns = netstack_find_by_cred(crp)) != NULL); 450 ips = ns->netstack_ipnet; 451 452 rq->q_ptr = WR(rq)->q_ptr = ipnet; 453 ipnet->ipnet_rq = rq; 454 ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); 455 ipnet->ipnet_zoneid = zoneid; 456 ipnet->ipnet_dlstate = DL_UNBOUND; 457 ipnet->ipnet_sap = 0; 458 ipnet->ipnet_ns = ns; 459 460 /* 461 * We need to hold ips_event_lock here as any NE_LIF_DOWN events need 462 * to be processed after ipnet_if is set and the ipnet_t has been 463 * inserted in the ips_str_list. 464 */ 465 mutex_enter(&ips->ips_event_lock); 466 if (getminor(*dev) == IPNET_MINOR_LO) { 467 ipnet->ipnet_flags |= IPNET_LOMODE; 468 ipnet->ipnet_acceptfn = ipnet_loaccept; 469 } else { 470 ipnet->ipnet_acceptfn = ipnet_accept; 471 ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips); 472 if (ipnet->ipnet_if == NULL || 473 !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) { 474 err = ENODEV; 475 goto done; 476 } 477 } 478 479 mutex_enter(&ips->ips_walkers_lock); 480 while (ips->ips_walkers_cnt != 0) 481 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 482 list_insert_head(&ips->ips_str_list, ipnet); 483 *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); 484 qprocson(rq); 485 486 /* 487 * Only register our callback if we're the first open client; we call 488 * unregister in close() for the last open client. 489 */ 490 if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) 491 ipobs_register_hook(ns, ipnet_input); 492 mutex_exit(&ips->ips_walkers_lock); 493 494 done: 495 mutex_exit(&ips->ips_event_lock); 496 if (err != 0) { 497 netstack_rele(ns); 498 id_free(ipnet_minor_space, ipnet->ipnet_minor); 499 if (ipnet->ipnet_if != NULL) 500 ipnetif_refrele(ipnet->ipnet_if); 501 kmem_free(ipnet, sizeof (*ipnet)); 502 } 503 return (err); 504 } 505 506 static int 507 ipnet_close(queue_t *rq) 508 { 509 ipnet_t *ipnet = rq->q_ptr; 510 ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; 511 512 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 513 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 514 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 515 ipnet_leave_allmulti(ipnet->ipnet_if, ips); 516 517 mutex_enter(&ips->ips_walkers_lock); 518 while (ips->ips_walkers_cnt != 0) 519 cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); 520 521 qprocsoff(rq); 522 523 list_remove(&ips->ips_str_list, ipnet); 524 if (ipnet->ipnet_if != NULL) 525 ipnetif_refrele(ipnet->ipnet_if); 526 id_free(ipnet_minor_space, ipnet->ipnet_minor); 527 kmem_free(ipnet, sizeof (*ipnet)); 528 529 if (list_is_empty(&ips->ips_str_list)) 530 ipobs_unregister_hook(ips->ips_netstack, ipnet_input); 531 532 mutex_exit(&ips->ips_walkers_lock); 533 netstack_rele(ips->ips_netstack); 534 return (0); 535 } 536 537 static int 538 ipnet_wput(queue_t *q, mblk_t *mp) 539 { 540 switch (mp->b_datap->db_type) { 541 case M_FLUSH: 542 if (*mp->b_rptr & FLUSHW) { 543 flushq(q, FLUSHDATA); 544 *mp->b_rptr &= ~FLUSHW; 545 } 546 if (*mp->b_rptr & FLUSHR) 547 qreply(q, mp); 548 else 549 freemsg(mp); 550 break; 551 case M_PROTO: 552 case M_PCPROTO: 553 ipnet_wputnondata(q, mp); 554 break; 555 case M_IOCTL: 556 ipnet_ioctl(q, mp); 557 break; 558 case M_IOCDATA: 559 ipnet_iocdata(q, mp); 560 break; 561 default: 562 freemsg(mp); 563 break; 564 } 565 return (0); 566 } 567 568 static int 569 ipnet_rsrv(queue_t *q) 570 { 571 mblk_t *mp; 572 573 while ((mp = getq(q)) != NULL) { 574 ASSERT(DB_TYPE(mp) == M_DATA); 575 if (canputnext(q)) { 576 putnext(q, mp); 577 } else { 578 (void) putbq(q, mp); 579 break; 580 } 581 } 582 return (0); 583 } 584 585 static void 586 ipnet_ioctl(queue_t *q, mblk_t *mp) 587 { 588 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 589 590 switch (iocp->ioc_cmd) { 591 case DLIOCRAW: 592 miocack(q, mp, 0, 0); 593 break; 594 case DLIOCIPNETINFO: 595 if (iocp->ioc_count == TRANSPARENT) { 596 mcopyin(mp, NULL, sizeof (uint_t), NULL); 597 qreply(q, mp); 598 break; 599 } 600 /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */ 601 default: 602 miocnak(q, mp, 0, EINVAL); 603 break; 604 } 605 } 606 607 static void 608 ipnet_iocdata(queue_t *q, mblk_t *mp) 609 { 610 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 611 ipnet_t *ipnet = q->q_ptr; 612 613 switch (iocp->ioc_cmd) { 614 case DLIOCIPNETINFO: 615 if (*(int *)mp->b_cont->b_rptr == 1) 616 ipnet->ipnet_flags |= IPNET_INFO; 617 else if (*(int *)mp->b_cont->b_rptr == 0) 618 ipnet->ipnet_flags &= ~IPNET_INFO; 619 else 620 goto iocnak; 621 miocack(q, mp, 0, DL_IPNETINFO_VERSION); 622 break; 623 default: 624 iocnak: 625 miocnak(q, mp, 0, EINVAL); 626 break; 627 } 628 } 629 630 static void 631 ipnet_wputnondata(queue_t *q, mblk_t *mp) 632 { 633 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 634 t_uscalar_t prim = dlp->dl_primitive; 635 636 switch (prim) { 637 case DL_INFO_REQ: 638 ipnet_inforeq(q, mp); 639 break; 640 case DL_UNBIND_REQ: 641 ipnet_unbindreq(q, mp); 642 break; 643 case DL_BIND_REQ: 644 ipnet_bindreq(q, mp); 645 break; 646 case DL_PROMISCON_REQ: 647 ipnet_dlpromisconreq(q, mp); 648 break; 649 case DL_PROMISCOFF_REQ: 650 ipnet_dlpromiscoffreq(q, mp); 651 break; 652 case DL_UNITDATA_REQ: 653 case DL_DETACH_REQ: 654 case DL_PHYS_ADDR_REQ: 655 case DL_SET_PHYS_ADDR_REQ: 656 case DL_ENABMULTI_REQ: 657 case DL_DISABMULTI_REQ: 658 case DL_ATTACH_REQ: 659 dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); 660 break; 661 default: 662 dlerrorack(q, mp, prim, DL_BADPRIM, 0); 663 break; 664 } 665 } 666 667 static void 668 ipnet_inforeq(queue_t *q, mblk_t *mp) 669 { 670 dl_info_ack_t *dlip; 671 size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); 672 673 if (MBLKL(mp) < DL_INFO_REQ_SIZE) { 674 dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); 675 return; 676 } 677 678 if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) 679 return; 680 681 dlip = (dl_info_ack_t *)mp->b_rptr; 682 *dlip = ipnet_infoack; 683 qreply(q, mp); 684 } 685 686 static void 687 ipnet_bindreq(queue_t *q, mblk_t *mp) 688 { 689 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 690 int32_t sap; 691 ipnet_t *ipnet = q->q_ptr; 692 693 if (MBLKL(mp) < DL_BIND_REQ_SIZE) { 694 dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); 695 return; 696 } 697 698 sap = dlp->bind_req.dl_sap; 699 if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) { 700 dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); 701 } else { 702 ipnet->ipnet_sap = sap; 703 ipnet->ipnet_dlstate = DL_IDLE; 704 dlbindack(q, mp, sap, 0, 0, 0, 0); 705 } 706 } 707 708 static void 709 ipnet_unbindreq(queue_t *q, mblk_t *mp) 710 { 711 ipnet_t *ipnet = q->q_ptr; 712 713 if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { 714 dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); 715 return; 716 } 717 718 if (ipnet->ipnet_dlstate != DL_IDLE) { 719 dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); 720 } else { 721 ipnet->ipnet_dlstate = DL_UNBOUND; 722 ipnet->ipnet_sap = 0; 723 dlokack(q, mp, DL_UNBIND_REQ); 724 } 725 } 726 727 static void 728 ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) 729 { 730 ipnet_t *ipnet = q->q_ptr; 731 t_uscalar_t level; 732 int err; 733 734 if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { 735 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 736 return; 737 } 738 739 if (ipnet->ipnet_flags & IPNET_LOMODE) { 740 dlokack(q, mp, DL_PROMISCON_REQ); 741 return; 742 } 743 744 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 745 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 746 if ((err = ipnet_join_allmulti(ipnet->ipnet_if, 747 ipnet->ipnet_ns->netstack_ipnet)) != 0) { 748 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); 749 return; 750 } 751 } 752 753 switch (level) { 754 case DL_PROMISC_PHYS: 755 ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; 756 break; 757 case DL_PROMISC_SAP: 758 ipnet->ipnet_flags |= IPNET_PROMISC_SAP; 759 break; 760 case DL_PROMISC_MULTI: 761 ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; 762 break; 763 default: 764 dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); 765 return; 766 } 767 768 dlokack(q, mp, DL_PROMISCON_REQ); 769 } 770 771 static void 772 ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) 773 { 774 ipnet_t *ipnet = q->q_ptr; 775 t_uscalar_t level; 776 uint16_t orig_ipnet_flags = ipnet->ipnet_flags; 777 778 if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { 779 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 780 return; 781 } 782 783 if (ipnet->ipnet_flags & IPNET_LOMODE) { 784 dlokack(q, mp, DL_PROMISCOFF_REQ); 785 return; 786 } 787 788 level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; 789 switch (level) { 790 case DL_PROMISC_PHYS: 791 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) 792 ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; 793 break; 794 case DL_PROMISC_SAP: 795 if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) 796 ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; 797 break; 798 case DL_PROMISC_MULTI: 799 if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) 800 ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; 801 break; 802 default: 803 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); 804 return; 805 } 806 807 if (orig_ipnet_flags == ipnet->ipnet_flags) { 808 dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); 809 return; 810 } 811 812 if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { 813 ipnet_leave_allmulti(ipnet->ipnet_if, 814 ipnet->ipnet_ns->netstack_ipnet); 815 } 816 817 dlokack(q, mp, DL_PROMISCOFF_REQ); 818 } 819 820 static int 821 ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 822 { 823 int err = 0; 824 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 825 uint64_t index = ipnetif->if_index; 826 827 mutex_enter(&ips->ips_event_lock); 828 if (ipnetif->if_multicnt == 0) { 829 ASSERT((ipnetif->if_flags & 830 (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); 831 if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { 832 err = ip_join_allmulti(index, B_FALSE, ipst); 833 if (err != 0) 834 goto done; 835 ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; 836 } 837 if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { 838 err = ip_join_allmulti(index, B_TRUE, ipst); 839 if (err != 0 && 840 (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { 841 (void) ip_leave_allmulti(index, B_FALSE, ipst); 842 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 843 goto done; 844 } 845 ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; 846 } 847 } 848 ipnetif->if_multicnt++; 849 850 done: 851 mutex_exit(&ips->ips_event_lock); 852 return (err); 853 } 854 855 static void 856 ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) 857 { 858 int err; 859 ip_stack_t *ipst = ips->ips_netstack->netstack_ip; 860 uint64_t index = ipnetif->if_index; 861 862 mutex_enter(&ips->ips_event_lock); 863 ASSERT(ipnetif->if_multicnt != 0); 864 if (--ipnetif->if_multicnt == 0) { 865 if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { 866 err = ip_leave_allmulti(index, B_FALSE, ipst); 867 ASSERT(err == 0 || err == ENODEV); 868 ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; 869 } 870 if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { 871 err = ip_leave_allmulti(index, B_TRUE, ipst); 872 ASSERT(err == 0 || err == ENODEV); 873 ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; 874 } 875 } 876 mutex_exit(&ips->ips_event_lock); 877 } 878 879 static mblk_t * 880 ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp) 881 { 882 mblk_t *dlhdr; 883 dl_ipnetinfo_t *dl; 884 885 if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { 886 freemsg(mp); 887 return (NULL); 888 } 889 dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; 890 dl->dli_version = DL_IPNETINFO_VERSION; 891 dl->dli_len = htons(sizeof (*dl)); 892 dl->dli_ipver = ihd->ihd_ipver; 893 dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc); 894 dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst); 895 dlhdr->b_wptr += sizeof (*dl); 896 dlhdr->b_cont = mp; 897 898 return (dlhdr); 899 } 900 901 static ipnet_addrtype_t 902 ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) 903 { 904 list_t *list; 905 ipnetif_t *ipnetif = ipnet->ipnet_if; 906 ipnetif_addr_t *ifaddr; 907 ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; 908 909 /* First check if the address is multicast or limited broadcast. */ 910 switch (addr->iap_family) { 911 case AF_INET: 912 if (CLASSD(*(addr->iap_addr4)) || 913 *(addr->iap_addr4) == INADDR_BROADCAST) 914 return (IPNETADDR_MBCAST); 915 break; 916 case AF_INET6: 917 if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) 918 return (IPNETADDR_MBCAST); 919 break; 920 } 921 922 /* 923 * Walk the address list to see if the address belongs to our 924 * interface or is one of our subnet broadcast addresses. 925 */ 926 mutex_enter(&ipnetif->if_addr_lock); 927 list = (addr->iap_family == AF_INET) ? 928 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; 929 for (ifaddr = list_head(list); 930 ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; 931 ifaddr = list_next(list, ifaddr)) { 932 /* 933 * If we're not in the global zone, then only look at 934 * addresses in our zone. 935 */ 936 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 937 ipnet->ipnet_zoneid != ifaddr->ifa_zone) 938 continue; 939 switch (addr->iap_family) { 940 case AF_INET: 941 if (ifaddr->ifa_ip4addr != INADDR_ANY && 942 *(addr->iap_addr4) == ifaddr->ifa_ip4addr) 943 addrtype = IPNETADDR_MYADDR; 944 else if (ifaddr->ifa_brdaddr != INADDR_ANY && 945 *(addr->iap_addr4) == ifaddr->ifa_brdaddr) 946 addrtype = IPNETADDR_MBCAST; 947 break; 948 case AF_INET6: 949 if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, 950 &ifaddr->ifa_ip6addr)) 951 addrtype = IPNETADDR_MYADDR; 952 break; 953 } 954 } 955 mutex_exit(&ipnetif->if_addr_lock); 956 957 return (addrtype); 958 } 959 960 /* 961 * Verify if the packet contained in ihd should be passed up to the 962 * ipnet client stream. 963 */ 964 static boolean_t 965 ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, 966 ipnet_addrp_t *dst) 967 { 968 uint64_t ifindex = ipnet->ipnet_if->if_index; 969 ipnet_addrtype_t srctype, dsttype; 970 971 srctype = ipnet_get_addrtype(ipnet, src); 972 dsttype = ipnet_get_addrtype(ipnet, dst); 973 974 /* 975 * Do not allow an ipnet stream to see packets that are not from or to 976 * its zone. The exception is when zones are using the shared stack 977 * model. In this case, streams in the global zone have visibility 978 * into other shared-stack zones, and broadcast and multicast traffic 979 * is visible by all zones in the stack. 980 */ 981 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && 982 dsttype != IPNETADDR_MBCAST) { 983 if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && 984 ipnet->ipnet_zoneid != ihd->ihd_zdst) 985 return (B_FALSE); 986 } 987 988 /* 989 * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the 990 * packet's IP version. 991 */ 992 if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && 993 ipnet->ipnet_sap != ihd->ihd_ipver) 994 return (B_FALSE); 995 996 /* If the destination address is ours, then accept the packet. */ 997 if (dsttype == IPNETADDR_MYADDR) 998 return (B_TRUE); 999 1000 /* 1001 * If DL_PROMISC_PHYS is enabled, then we can see all packets that are 1002 * sent or received on the interface we're observing, or packets that 1003 * have our source address (this allows us to see packets we send). 1004 */ 1005 if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { 1006 if (ihd->ihd_ifindex == ifindex || srctype == IPNETADDR_MYADDR) 1007 return (B_TRUE); 1008 } 1009 1010 /* 1011 * We accept multicast and broadcast packets transmitted or received 1012 * on the interface we're observing. 1013 */ 1014 if (dsttype == IPNETADDR_MBCAST && ihd->ihd_ifindex == ifindex) 1015 return (B_TRUE); 1016 1017 return (B_FALSE); 1018 } 1019 1020 /* 1021 * Verify if the packet contained in ihd should be passed up to the ipnet 1022 * client stream that's in IPNET_LOMODE. 1023 */ 1024 /* ARGSUSED */ 1025 static boolean_t 1026 ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, 1027 ipnet_addrp_t *dst) 1028 { 1029 if (ihd->ihd_htype != IPOBS_HOOK_LOCAL) 1030 return (B_FALSE); 1031 1032 /* 1033 * An ipnet stream must not see packets that are not from/to its zone. 1034 */ 1035 if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { 1036 if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && 1037 ipnet->ipnet_zoneid != ihd->ihd_zdst) 1038 return (B_FALSE); 1039 } 1040 1041 return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver); 1042 } 1043 1044 static void 1045 ipnet_dispatch(void *arg) 1046 { 1047 mblk_t *mp = arg; 1048 ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; 1049 ipnet_t *ipnet; 1050 mblk_t *netmp; 1051 list_t *list; 1052 ipnet_stack_t *ips = ihd->ihd_stack->netstack_ipnet; 1053 ipnet_addrp_t src, dst; 1054 1055 if (ihd->ihd_ipver == IPV4_VERSION) { 1056 src.iap_family = dst.iap_family = AF_INET; 1057 src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src; 1058 dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst; 1059 } else { 1060 src.iap_family = dst.iap_family = AF_INET6; 1061 src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src; 1062 dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst; 1063 } 1064 1065 ipnet_walkers_inc(ips); 1066 1067 list = &ips->ips_str_list; 1068 for (ipnet = list_head(list); ipnet != NULL; 1069 ipnet = list_next(list, ipnet)) { 1070 if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst)) 1071 continue; 1072 1073 if (list_next(list, ipnet) == NULL) { 1074 netmp = ihd->ihd_mp; 1075 ihd->ihd_mp = NULL; 1076 } else { 1077 if ((netmp = dupmsg(ihd->ihd_mp)) == NULL && 1078 (netmp = copymsg(ihd->ihd_mp)) == NULL) { 1079 atomic_inc_64(&ips->ips_drops); 1080 continue; 1081 } 1082 } 1083 1084 if (ipnet->ipnet_flags & IPNET_INFO) { 1085 if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) { 1086 atomic_inc_64(&ips->ips_drops); 1087 continue; 1088 } 1089 } 1090 1091 if (ipnet->ipnet_rq->q_first == NULL && 1092 canputnext(ipnet->ipnet_rq)) { 1093 putnext(ipnet->ipnet_rq, netmp); 1094 } else if (canput(ipnet->ipnet_rq)) { 1095 (void) putq(ipnet->ipnet_rq, netmp); 1096 } else { 1097 freemsg(netmp); 1098 atomic_inc_64(&ips->ips_drops); 1099 } 1100 } 1101 1102 ipnet_walkers_dec(ips); 1103 1104 freemsg(ihd->ihd_mp); 1105 freemsg(mp); 1106 } 1107 1108 static void 1109 ipnet_input(mblk_t *mp) 1110 { 1111 ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; 1112 1113 if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != 1114 DDI_SUCCESS) { 1115 atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops); 1116 freemsg(ihd->ihd_mp); 1117 freemsg(mp); 1118 } 1119 } 1120 1121 /* 1122 * Create a new ipnetif_t and new minor node for it. If creation is 1123 * successful the new ipnetif_t is inserted into an avl_tree 1124 * containing ipnetif's for this stack instance. 1125 */ 1126 static ipnetif_t * 1127 ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips) 1128 { 1129 ipnetif_t *ipnetif; 1130 avl_index_t where = 0; 1131 minor_t ifminor; 1132 1133 /* 1134 * Because ipnet_create_if() can be called from a NIC event 1135 * callback, it should not block. 1136 */ 1137 ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); 1138 if (ifminor == (minor_t)-1) 1139 return (NULL); 1140 if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) { 1141 id_free(ipnet_minor_space, ifminor); 1142 return (NULL); 1143 } 1144 1145 (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); 1146 ipnetif->if_index = index; 1147 1148 mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); 1149 list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), 1150 offsetof(ipnetif_addr_t, ifa_link)); 1151 list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), 1152 offsetof(ipnetif_addr_t, ifa_link)); 1153 ipnetif->if_dev = makedevice(ipnet_major, ifminor); 1154 mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); 1155 ipnetif->if_refcnt = 1; 1156 1157 mutex_enter(&ips->ips_avl_lock); 1158 VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); 1159 avl_insert(&ips->ips_avl_by_index, ipnetif, where); 1160 VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); 1161 avl_insert(&ips->ips_avl_by_name, ipnetif, where); 1162 mutex_exit(&ips->ips_avl_lock); 1163 1164 return (ipnetif); 1165 } 1166 1167 static void 1168 ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1169 { 1170 ipnet_t *ipnet; 1171 1172 ipnet_walkers_inc(ips); 1173 /* Send a SIGHUP to all open streams associated with this ipnetif. */ 1174 for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; 1175 ipnet = list_next(&ips->ips_str_list, ipnet)) { 1176 if (ipnet->ipnet_if == ipnetif) 1177 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1178 } 1179 ipnet_walkers_dec(ips); 1180 mutex_enter(&ips->ips_avl_lock); 1181 avl_remove(&ips->ips_avl_by_index, ipnetif); 1182 avl_remove(&ips->ips_avl_by_name, ipnetif); 1183 mutex_exit(&ips->ips_avl_lock); 1184 /* Release the reference we implicitly held in ipnet_create_if(). */ 1185 ipnetif_refrele(ipnetif); 1186 } 1187 1188 static void 1189 ipnet_purge_addrlist(list_t *addrlist) 1190 { 1191 ipnetif_addr_t *ifa; 1192 1193 while ((ifa = list_head(addrlist)) != NULL) { 1194 list_remove(addrlist, ifa); 1195 kmem_free(ifa, sizeof (*ifa)); 1196 } 1197 } 1198 1199 static void 1200 ipnet_free_if(ipnetif_t *ipnetif) 1201 { 1202 ASSERT(ipnetif->if_refcnt == 0); 1203 1204 /* Remove IPv4/v6 address lists from the ipnetif */ 1205 ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); 1206 list_destroy(&ipnetif->if_ip4addr_list); 1207 ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); 1208 list_destroy(&ipnetif->if_ip6addr_list); 1209 mutex_destroy(&ipnetif->if_addr_lock); 1210 mutex_destroy(&ipnetif->if_reflock); 1211 id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); 1212 kmem_free(ipnetif, sizeof (*ipnetif)); 1213 } 1214 1215 /* 1216 * Create an ipnetif_addr_t with the given logical interface id (lif) 1217 * and add it to the supplied ipnetif. The lif is the netinfo 1218 * representation of logical interface id, and we use this id to match 1219 * incoming netinfo events against our lists of addresses. 1220 */ 1221 static void 1222 ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) 1223 { 1224 ipnetif_addr_t *ifaddr; 1225 zoneid_t zoneid; 1226 struct sockaddr_in bcast; 1227 struct sockaddr_storage addr; 1228 net_ifaddr_t type = NA_ADDRESS; 1229 uint64_t phyif = ipnetif->if_index; 1230 1231 if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || 1232 net_getlifzone(nd, phyif, lif, &zoneid) != 0) 1233 return; 1234 if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) 1235 return; 1236 1237 ifaddr->ifa_zone = zoneid; 1238 ifaddr->ifa_id = lif; 1239 1240 switch (addr.ss_family) { 1241 case AF_INET: 1242 ifaddr->ifa_ip4addr = 1243 ((struct sockaddr_in *)&addr)->sin_addr.s_addr; 1244 /* 1245 * Try and get the broadcast address. Note that it's okay for 1246 * an interface to not have a broadcast address, so we don't 1247 * fail the entire operation if net_getlifaddr() fails here. 1248 */ 1249 type = NA_BROADCAST; 1250 if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) 1251 ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; 1252 break; 1253 case AF_INET6: 1254 ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; 1255 break; 1256 } 1257 1258 mutex_enter(&ipnetif->if_addr_lock); 1259 list_insert_tail(addr.ss_family == AF_INET ? 1260 &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); 1261 mutex_exit(&ipnetif->if_addr_lock); 1262 } 1263 1264 static void 1265 ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) 1266 { 1267 mutex_enter(&ipnetif->if_addr_lock); 1268 list_remove(isv6 ? 1269 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); 1270 mutex_exit(&ipnetif->if_addr_lock); 1271 kmem_free(ifaddr, sizeof (*ifaddr)); 1272 } 1273 1274 static void 1275 ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips, 1276 boolean_t isv6) 1277 { 1278 ipnetif_t *ipnetif; 1279 boolean_t refrele_needed = B_TRUE; 1280 1281 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) { 1282 ipnetif = ipnet_create_if(ifname, ifindex, ips); 1283 refrele_needed = B_FALSE; 1284 } 1285 if (ipnetif != NULL) { 1286 ipnetif->if_flags |= 1287 isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; 1288 } 1289 1290 if (ipnetif->if_multicnt != 0) { 1291 if (ip_join_allmulti(ifindex, isv6, 1292 ips->ips_netstack->netstack_ip) == 0) { 1293 ipnetif->if_flags |= 1294 isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; 1295 } 1296 } 1297 1298 if (refrele_needed) 1299 ipnetif_refrele(ipnetif); 1300 } 1301 1302 static void 1303 ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) 1304 { 1305 ipnetif_t *ipnetif; 1306 1307 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1308 return; 1309 1310 mutex_enter(&ipnetif->if_addr_lock); 1311 ipnet_purge_addrlist(isv6 ? 1312 &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); 1313 mutex_exit(&ipnetif->if_addr_lock); 1314 1315 /* 1316 * Note that we have one ipnetif for both IPv4 and IPv6, but we receive 1317 * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif 1318 * if both IPv4 and IPv6 interfaces have been unplumbed. 1319 */ 1320 ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; 1321 if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) 1322 ipnet_remove_if(ipnetif, ips); 1323 ipnetif_refrele(ipnetif); 1324 } 1325 1326 static void 1327 ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, 1328 ipnet_stack_t *ips, boolean_t isv6) 1329 { 1330 ipnetif_t *ipnetif; 1331 ipnetif_addr_t *ifaddr; 1332 1333 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1334 return; 1335 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { 1336 /* 1337 * We must have missed a NE_LIF_DOWN event. Delete this 1338 * ifaddr and re-create it. 1339 */ 1340 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1341 } 1342 1343 ipnet_add_ifaddr(lifindex, ipnetif, nd); 1344 ipnetif_refrele(ipnetif); 1345 } 1346 1347 static void 1348 ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, 1349 boolean_t isv6) 1350 { 1351 ipnetif_t *ipnetif; 1352 ipnetif_addr_t *ifaddr; 1353 1354 if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) 1355 return; 1356 if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) 1357 ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); 1358 ipnetif_refrele(ipnetif); 1359 /* 1360 * Make sure that open streams on this ipnetif are still allowed to 1361 * have it open. 1362 */ 1363 ipnet_if_zonecheck(ipnetif, ips); 1364 } 1365 1366 /* 1367 * This callback from the NIC event framework dispatches a taskq as the event 1368 * handlers may block. 1369 */ 1370 /* ARGSUSED */ 1371 static int 1372 ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) 1373 { 1374 ipnet_stack_t *ips = arg; 1375 hook_nic_event_t *hn = (hook_nic_event_t *)info; 1376 ipnet_nicevent_t *ipne; 1377 1378 if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) 1379 return (0); 1380 ipne->ipne_event = hn->hne_event; 1381 ipne->ipne_protocol = hn->hne_protocol; 1382 ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; 1383 ipne->ipne_ifindex = hn->hne_nic; 1384 ipne->ipne_lifindex = hn->hne_lif; 1385 if (hn->hne_datalen != 0) { 1386 (void) strlcpy(ipne->ipne_ifname, hn->hne_data, 1387 sizeof (ipne->ipne_ifname)); 1388 } 1389 (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, 1390 ipne, DDI_NOSLEEP); 1391 return (0); 1392 } 1393 1394 static void 1395 ipnet_nicevent_task(void *arg) 1396 { 1397 ipnet_nicevent_t *ipne = arg; 1398 netstack_t *ns; 1399 ipnet_stack_t *ips; 1400 boolean_t isv6; 1401 1402 if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) 1403 goto done; 1404 ips = ns->netstack_ipnet; 1405 isv6 = (ipne->ipne_protocol == ips->ips_ndv6); 1406 1407 mutex_enter(&ips->ips_event_lock); 1408 switch (ipne->ipne_event) { 1409 case NE_PLUMB: 1410 ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips, 1411 isv6); 1412 break; 1413 case NE_UNPLUMB: 1414 ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); 1415 break; 1416 case NE_LIF_UP: 1417 ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, 1418 ipne->ipne_protocol, ips, isv6); 1419 break; 1420 case NE_LIF_DOWN: 1421 ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, 1422 isv6); 1423 break; 1424 default: 1425 break; 1426 } 1427 mutex_exit(&ips->ips_event_lock); 1428 done: 1429 if (ns != NULL) 1430 netstack_rele(ns); 1431 kmem_free(ipne, sizeof (ipnet_nicevent_t)); 1432 } 1433 1434 dev_t 1435 ipnet_if_getdev(char *name, zoneid_t zoneid) 1436 { 1437 netstack_t *ns; 1438 ipnet_stack_t *ips; 1439 ipnetif_t *ipnetif; 1440 dev_t dev = (dev_t)-1; 1441 1442 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1443 return (dev); 1444 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1445 return (dev); 1446 1447 ips = ns->netstack_ipnet; 1448 mutex_enter(&ips->ips_avl_lock); 1449 if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { 1450 if (ipnet_if_in_zone(ipnetif, zoneid, ips)) 1451 dev = ipnetif->if_dev; 1452 } 1453 mutex_exit(&ips->ips_avl_lock); 1454 netstack_rele(ns); 1455 1456 return (dev); 1457 } 1458 1459 static ipnetif_t * 1460 ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips) 1461 { 1462 ipnetif_t *ipnetif; 1463 1464 mutex_enter(&ips->ips_avl_lock); 1465 if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) 1466 ipnetif_refhold(ipnetif); 1467 mutex_exit(&ips->ips_avl_lock); 1468 return (ipnetif); 1469 } 1470 1471 static ipnetif_t * 1472 ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips) 1473 { 1474 ipnetif_t *ipnetif; 1475 avl_tree_t *tree; 1476 1477 mutex_enter(&ips->ips_avl_lock); 1478 tree = &ips->ips_avl_by_index; 1479 for (ipnetif = avl_first(tree); ipnetif != NULL; 1480 ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { 1481 if (ipnetif->if_dev == dev) { 1482 ipnetif_refhold(ipnetif); 1483 break; 1484 } 1485 } 1486 mutex_exit(&ips->ips_avl_lock); 1487 return (ipnetif); 1488 } 1489 1490 static ipnetif_addr_t * 1491 ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) 1492 { 1493 ipnetif_addr_t *ifaddr; 1494 list_t *list; 1495 1496 mutex_enter(&ipnetif->if_addr_lock); 1497 list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; 1498 for (ifaddr = list_head(list); ifaddr != NULL; 1499 ifaddr = list_next(list, ifaddr)) { 1500 if (lid == ifaddr->ifa_id) 1501 break; 1502 } 1503 mutex_exit(&ipnetif->if_addr_lock); 1504 return (ifaddr); 1505 } 1506 1507 /* ARGSUSED */ 1508 static void * 1509 ipnet_stack_init(netstackid_t stackid, netstack_t *ns) 1510 { 1511 ipnet_stack_t *ips; 1512 1513 ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); 1514 ips->ips_netstack = ns; 1515 mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); 1516 avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index, 1517 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); 1518 avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name, 1519 sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); 1520 mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); 1521 cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); 1522 list_create(&ips->ips_str_list, sizeof (ipnet_t), 1523 offsetof(ipnet_t, ipnet_next)); 1524 ipnet_register_netihook(ips); 1525 return (ips); 1526 } 1527 1528 /* ARGSUSED */ 1529 static void 1530 ipnet_stack_fini(netstackid_t stackid, void *arg) 1531 { 1532 ipnet_stack_t *ips = arg; 1533 ipnetif_t *ipnetif, *nipnetif; 1534 1535 if (ips->ips_ndv4 != NULL) { 1536 VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, 1537 ips->ips_nicevents) == 0); 1538 VERIFY(net_protocol_release(ips->ips_ndv4) == 0); 1539 } 1540 if (ips->ips_ndv6 != NULL) { 1541 VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, 1542 ips->ips_nicevents) == 0); 1543 VERIFY(net_protocol_release(ips->ips_ndv6) == 0); 1544 } 1545 hook_free(ips->ips_nicevents); 1546 1547 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1548 ipnetif = nipnetif) { 1549 nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); 1550 ipnet_remove_if(ipnetif, ips); 1551 } 1552 avl_destroy(&ips->ips_avl_by_index); 1553 avl_destroy(&ips->ips_avl_by_name); 1554 mutex_destroy(&ips->ips_avl_lock); 1555 mutex_destroy(&ips->ips_walkers_lock); 1556 cv_destroy(&ips->ips_walkers_cv); 1557 list_destroy(&ips->ips_str_list); 1558 kmem_free(ips, sizeof (*ips)); 1559 } 1560 1561 /* Do any of the addresses in addrlist belong the supplied zoneid? */ 1562 static boolean_t 1563 ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) 1564 { 1565 ipnetif_addr_t *ifa; 1566 1567 for (ifa = list_head(addrlist); ifa != NULL; 1568 ifa = list_next(addrlist, ifa)) { 1569 if (ifa->ifa_zone == zoneid) 1570 return (B_TRUE); 1571 } 1572 return (B_FALSE); 1573 } 1574 1575 /* Should the supplied ipnetif be visible from the supplied zoneid? */ 1576 static boolean_t 1577 ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) 1578 { 1579 int ret; 1580 1581 /* 1582 * The global zone has visibility into all interfaces in the global 1583 * stack, and exclusive stack zones have visibility into all 1584 * interfaces in their stack. 1585 */ 1586 if (zoneid == GLOBAL_ZONEID || 1587 ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 1588 return (B_TRUE); 1589 1590 /* 1591 * Shared-stack zones only have visibility for interfaces that have 1592 * addresses in their zone. 1593 */ 1594 mutex_enter(&ipnetif->if_addr_lock); 1595 ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || 1596 ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); 1597 mutex_exit(&ipnetif->if_addr_lock); 1598 return (ret); 1599 } 1600 1601 /* 1602 * Verify that any ipnet_t that has a reference to the supplied ipnetif should 1603 * still be allowed to have it open. A given ipnet_t may no longer be allowed 1604 * to have an ipnetif open if there are no longer any addresses that belong to 1605 * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the 1606 * case, send the ipnet_t an M_HANGUP. 1607 */ 1608 static void 1609 ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) 1610 { 1611 list_t *strlist = &ips->ips_str_list; 1612 ipnet_t *ipnet; 1613 1614 ipnet_walkers_inc(ips); 1615 for (ipnet = list_head(strlist); ipnet != NULL; 1616 ipnet = list_next(strlist, ipnet)) { 1617 if (ipnet->ipnet_if != ipnetif) 1618 continue; 1619 if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) 1620 (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); 1621 } 1622 ipnet_walkers_dec(ips); 1623 } 1624 1625 void 1626 ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) 1627 { 1628 ipnetif_t *ipnetif; 1629 list_t cbdata; 1630 ipnetif_cbdata_t *cbnode; 1631 netstack_t *ns; 1632 ipnet_stack_t *ips; 1633 1634 /* 1635 * On labeled systems, non-global zones shouldn't see anything 1636 * in /dev/ipnet. 1637 */ 1638 if (is_system_labeled() && zoneid != GLOBAL_ZONEID) 1639 return; 1640 1641 if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) 1642 return; 1643 1644 ips = ns->netstack_ipnet; 1645 list_create(&cbdata, sizeof (ipnetif_cbdata_t), 1646 offsetof(ipnetif_cbdata_t, ic_next)); 1647 1648 mutex_enter(&ips->ips_avl_lock); 1649 for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; 1650 ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { 1651 if (!ipnet_if_in_zone(ipnetif, zoneid, ips)) 1652 continue; 1653 cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); 1654 (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); 1655 cbnode->ic_dev = ipnetif->if_dev; 1656 list_insert_head(&cbdata, cbnode); 1657 } 1658 mutex_exit(&ips->ips_avl_lock); 1659 1660 while ((cbnode = list_head(&cbdata)) != NULL) { 1661 cb(cbnode->ic_ifname, arg, cbnode->ic_dev); 1662 list_remove(&cbdata, cbnode); 1663 kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); 1664 } 1665 list_destroy(&cbdata); 1666 netstack_rele(ns); 1667 } 1668 1669 static int 1670 ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp) 1671 { 1672 int64_t index1 = *((int64_t *)index_ptr); 1673 int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; 1674 1675 return (SIGNOF(index2 - index1)); 1676 } 1677 1678 static int 1679 ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp) 1680 { 1681 int res; 1682 1683 res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); 1684 return (SIGNOF(res)); 1685 } 1686 1687 static void 1688 ipnetif_refhold(ipnetif_t *ipnetif) 1689 { 1690 mutex_enter(&ipnetif->if_reflock); 1691 ipnetif->if_refcnt++; 1692 mutex_exit(&ipnetif->if_reflock); 1693 } 1694 1695 static void 1696 ipnetif_refrele(ipnetif_t *ipnetif) 1697 { 1698 mutex_enter(&ipnetif->if_reflock); 1699 ASSERT(ipnetif->if_refcnt != 0); 1700 if (--ipnetif->if_refcnt == 0) 1701 ipnet_free_if(ipnetif); 1702 else 1703 mutex_exit(&ipnetif->if_reflock); 1704 } 1705 1706 static void 1707 ipnet_walkers_inc(ipnet_stack_t *ips) 1708 { 1709 mutex_enter(&ips->ips_walkers_lock); 1710 ips->ips_walkers_cnt++; 1711 mutex_exit(&ips->ips_walkers_lock); 1712 } 1713 1714 static void 1715 ipnet_walkers_dec(ipnet_stack_t *ips) 1716 { 1717 mutex_enter(&ips->ips_walkers_lock); 1718 ASSERT(ips->ips_walkers_cnt != 0); 1719 if (--ips->ips_walkers_cnt == 0) 1720 cv_broadcast(&ips->ips_walkers_cv); 1721 mutex_exit(&ips->ips_walkers_lock); 1722 } 1723