/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2008 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * The ipnet device defined here provides access to packets at the IP layer. To * provide access to packets at this layer it registers a callback function in * the ip module and when there are open instances of the device ip will pass * packets into the device. Packets from ip are passed on the input, output and * loopback paths. Internally the module returns to ip as soon as possible by * deferring processing using a taskq. * * Management of the devices in /dev/ipnet/ is handled by the devname * filesystem and use of the neti interfaces. This module registers for NIC * events using the neti framework so that when IP interfaces are bought up, * taken down etc. the ipnet module is notified and its view of the interfaces * configured on the system adjusted. On attach, the module gets an initial * view of the system again using the neti framework but as it has already * registered for IP interface events, it is still up-to-date with any changes. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include static struct module_info ipnet_minfo = { 1, /* mi_idnum */ "ipnet", /* mi_idname */ 0, /* mi_minpsz */ INFPSZ, /* mi_maxpsz */ 2048, /* mi_hiwat */ 0 /* mi_lowat */ }; /* * List to hold static view of ipnetif_t's on the system. This is needed to * avoid holding the lock protecting the avl tree of ipnetif's over the * callback into the dev filesystem. */ typedef struct ipnetif_cbdata { char ic_ifname[LIFNAMSIZ]; dev_t ic_dev; list_node_t ic_next; } ipnetif_cbdata_t; /* * Convenience enumerated type for ipnet_accept(). It describes the * properties of a given ipnet_addrp_t relative to a single ipnet_t * client stream. The values represent whether the address is ... */ typedef enum { IPNETADDR_MYADDR, /* an address on my ipnetif_t. */ IPNETADDR_MBCAST, /* a multicast or broadcast address. */ IPNETADDR_UNKNOWN /* none of the above. */ } ipnet_addrtype_t; /* Argument used for the ipnet_nicevent_taskq callback. */ typedef struct ipnet_nicevent_s { nic_event_t ipne_event; net_handle_t ipne_protocol; netstackid_t ipne_stackid; uint64_t ipne_ifindex; uint64_t ipne_lifindex; char ipne_ifname[LIFNAMSIZ]; } ipnet_nicevent_t; static dev_info_t *ipnet_dip; static major_t ipnet_major; static ddi_taskq_t *ipnet_taskq; /* taskq for packets */ static ddi_taskq_t *ipnet_nicevent_taskq; /* taskq for NIC events */ static id_space_t *ipnet_minor_space; static const int IPNET_MINOR_LO = 1; /* minor number for /dev/lo0 */ static const int IPNET_MINOR_MIN = 2; /* start of dynamic minors */ static dl_info_ack_t ipnet_infoack = IPNET_INFO_ACK_INIT; static ipnet_acceptfn_t ipnet_accept, ipnet_loaccept; static void ipnet_input(mblk_t *); static int ipnet_wput(queue_t *, mblk_t *); static int ipnet_rsrv(queue_t *); static int ipnet_open(queue_t *, dev_t *, int, int, cred_t *); static int ipnet_close(queue_t *); static void ipnet_ioctl(queue_t *, mblk_t *); static void ipnet_iocdata(queue_t *, mblk_t *); static void ipnet_wputnondata(queue_t *, mblk_t *); static int ipnet_attach(dev_info_t *, ddi_attach_cmd_t); static int ipnet_detach(dev_info_t *, ddi_detach_cmd_t); static int ipnet_devinfo(dev_info_t *, ddi_info_cmd_t, void *, void **); static void ipnet_inforeq(queue_t *q, mblk_t *mp); static void ipnet_bindreq(queue_t *q, mblk_t *mp); static void ipnet_unbindreq(queue_t *q, mblk_t *mp); static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp); static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp); static int ipnet_join_allmulti(ipnetif_t *, ipnet_stack_t *); static void ipnet_leave_allmulti(ipnetif_t *, ipnet_stack_t *); static int ipnet_nicevent_cb(hook_event_token_t, hook_data_t, void *); static void ipnet_nicevent_task(void *); static ipnetif_t *ipnet_create_if(const char *, uint64_t, ipnet_stack_t *); static void ipnet_remove_if(ipnetif_t *, ipnet_stack_t *); static ipnetif_addr_t *ipnet_match_lif(ipnetif_t *, lif_if_t, boolean_t); static ipnetif_t *ipnet_if_getby_index(uint64_t, ipnet_stack_t *); static ipnetif_t *ipnet_if_getby_dev(dev_t, ipnet_stack_t *); static boolean_t ipnet_if_in_zone(ipnetif_t *, zoneid_t, ipnet_stack_t *); static void ipnet_if_zonecheck(ipnetif_t *, ipnet_stack_t *); static int ipnet_populate_if(net_handle_t, ipnet_stack_t *, boolean_t); static int ipnet_if_compare_name(const void *, const void *); static int ipnet_if_compare_index(const void *, const void *); static void ipnet_add_ifaddr(uint64_t, ipnetif_t *, net_handle_t); static void ipnet_delete_ifaddr(ipnetif_addr_t *, ipnetif_t *, boolean_t); static void ipnetif_refhold(ipnetif_t *); static void ipnetif_refrele(ipnetif_t *); static void ipnet_walkers_inc(ipnet_stack_t *); static void ipnet_walkers_dec(ipnet_stack_t *); static void ipnet_register_netihook(ipnet_stack_t *); static void *ipnet_stack_init(netstackid_t, netstack_t *); static void ipnet_stack_fini(netstackid_t, void *); static struct qinit ipnet_rinit = { NULL, /* qi_putp */ ipnet_rsrv, /* qi_srvp */ ipnet_open, /* qi_qopen */ ipnet_close, /* qi_qclose */ NULL, /* qi_qadmin */ &ipnet_minfo, /* qi_minfo */ }; static struct qinit ipnet_winit = { ipnet_wput, /* qi_putp */ NULL, /* qi_srvp */ NULL, /* qi_qopen */ NULL, /* qi_qclose */ NULL, /* qi_qadmin */ &ipnet_minfo, /* qi_minfo */ }; static struct streamtab ipnet_info = { &ipnet_rinit, &ipnet_winit }; DDI_DEFINE_STREAM_OPS(ipnet_ops, nulldev, nulldev, ipnet_attach, ipnet_detach, nodev, ipnet_devinfo, D_MP | D_MTPERMOD, &ipnet_info, ddi_quiesce_not_supported); static struct modldrv modldrv = { &mod_driverops, "STREAMS ipnet driver", &ipnet_ops }; static struct modlinkage modlinkage = { MODREV_1, &modldrv, NULL }; /* * Walk the list of physical interfaces on the machine, for each * interface create a new ipnetif_t and add any addresses to it. We * need to do the walk twice, once for IPv4 and once for IPv6. * * The interfaces are destroyed as part of ipnet_stack_fini() for each * stack. Note that we cannot do this initialization in * ipnet_stack_init(), since ipnet_stack_init() cannot fail. */ static int ipnet_if_init(void) { netstack_handle_t nh; netstack_t *ns; ipnet_stack_t *ips; int ret = 0; netstack_next_init(&nh); while ((ns = netstack_next(&nh)) != NULL) { ips = ns->netstack_ipnet; if ((ret = ipnet_populate_if(ips->ips_ndv4, ips, B_FALSE)) == 0) ret = ipnet_populate_if(ips->ips_ndv6, ips, B_TRUE); netstack_rele(ns); if (ret != 0) break; } netstack_next_fini(&nh); return (ret); } /* * Standard module entry points. */ int _init(void) { int ret; if ((ipnet_major = ddi_name_to_major("ipnet")) == (major_t)-1) return (ENODEV); ipnet_minor_space = id_space_create("ipnet_minor_space", IPNET_MINOR_MIN, MAXMIN32); netstack_register(NS_IPNET, ipnet_stack_init, NULL, ipnet_stack_fini); /* * We call ddi_taskq_create() with nthread == 1 to ensure in-order * delivery of packets to clients. */ ipnet_taskq = ddi_taskq_create(NULL, "ipnet", 1, TASKQ_DEFAULTPRI, 0); ipnet_nicevent_taskq = ddi_taskq_create(NULL, "ipnet_nic_event_queue", 1, TASKQ_DEFAULTPRI, 0); if (ipnet_taskq == NULL || ipnet_nicevent_taskq == NULL) { ret = ENOMEM; goto done; } if ((ret = ipnet_if_init()) == 0) ret = mod_install(&modlinkage); done: if (ret != 0) { if (ipnet_taskq != NULL) ddi_taskq_destroy(ipnet_taskq); if (ipnet_nicevent_taskq != NULL) ddi_taskq_destroy(ipnet_nicevent_taskq); netstack_unregister(NS_IPNET); id_space_destroy(ipnet_minor_space); } return (ret); } int _fini(void) { int err; if ((err = mod_remove(&modlinkage)) != 0) return (err); ddi_taskq_destroy(ipnet_nicevent_taskq); ddi_taskq_destroy(ipnet_taskq); netstack_unregister(NS_IPNET); id_space_destroy(ipnet_minor_space); return (0); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } static void ipnet_register_netihook(ipnet_stack_t *ips) { int ret; zoneid_t zoneid; netid_t netid; HOOK_INIT(ips->ips_nicevents, ipnet_nicevent_cb, "ipnet_nicevents", ips); /* * It is possible for an exclusive stack to be in the process of * shutting down here, and the netid and protocol lookups could fail * in that case. */ zoneid = netstackid_to_zoneid(ips->ips_netstack->netstack_stackid); if ((netid = net_zoneidtonetid(zoneid)) == -1) return; if ((ips->ips_ndv4 = net_protocol_lookup(netid, NHF_INET)) != NULL) { if ((ret = net_hook_register(ips->ips_ndv4, NH_NIC_EVENTS, ips->ips_nicevents)) != 0) { VERIFY(net_protocol_release(ips->ips_ndv4) == 0); ips->ips_ndv4 = NULL; cmn_err(CE_WARN, "unable to register IPv4 netinfo hooks" " in zone %d: %d", zoneid, ret); } } if ((ips->ips_ndv6 = net_protocol_lookup(netid, NHF_INET6)) != NULL) { if ((ret = net_hook_register(ips->ips_ndv6, NH_NIC_EVENTS, ips->ips_nicevents)) != 0) { VERIFY(net_protocol_release(ips->ips_ndv6) == 0); ips->ips_ndv6 = NULL; cmn_err(CE_WARN, "unable to register IPv6 netinfo hooks" " in zone %d: %d", zoneid, ret); } } } /* * This function is called on attach to build an initial view of the * interfaces on the system. It will be called once for IPv4 and once * for IPv6, although there is only one ipnet interface for both IPv4 * and IPv6 there are separate address lists. */ static int ipnet_populate_if(net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) { phy_if_t phyif; lif_if_t lif; ipnetif_t *ipnetif; char name[LIFNAMSIZ]; boolean_t new_if = B_FALSE; uint64_t ifflags; int ret = 0; /* * If ipnet_register_netihook() was unable to initialize this * stack's net_handle_t, then we cannot populate any interface * information. This usually happens when we attempted to * grab a net_handle_t as a stack was shutting down. We don't * want to fail the entire _init() operation because of a * stack shutdown (other stacks will continue to work just * fine), so we silently return success here. */ if (nd == NULL) return (0); /* * Make sure we're not processing NIC events during the * population of our interfaces and address lists. */ mutex_enter(&ips->ips_event_lock); for (phyif = net_phygetnext(nd, 0); phyif != 0; phyif = net_phygetnext(nd, phyif)) { if (net_getifname(nd, phyif, name, LIFNAMSIZ) != 0) continue; if ((ipnetif = ipnet_if_getby_index(phyif, ips)) == NULL) { ipnetif = ipnet_create_if(name, phyif, ips); if (ipnetif == NULL) { ret = ENOMEM; goto done; } new_if = B_TRUE; } ipnetif->if_flags |= isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; for (lif = net_lifgetnext(nd, phyif, 0); lif != 0; lif = net_lifgetnext(nd, phyif, lif)) { /* * Skip addresses that aren't up. We'll add * them when we receive an NE_LIF_UP event. */ if (net_getlifflags(nd, phyif, lif, &ifflags) != 0 || !(ifflags & IFF_UP)) continue; /* Don't add it if we already have it. */ if (ipnet_match_lif(ipnetif, lif, isv6) != NULL) continue; ipnet_add_ifaddr(lif, ipnetif, nd); } if (!new_if) ipnetif_refrele(ipnetif); } done: mutex_exit(&ips->ips_event_lock); return (ret); } static int ipnet_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) { if (cmd != DDI_ATTACH) return (DDI_FAILURE); if (ddi_create_minor_node(dip, "lo0", S_IFCHR, IPNET_MINOR_LO, DDI_PSEUDO, 0) == DDI_FAILURE) return (DDI_FAILURE); ipnet_dip = dip; return (DDI_SUCCESS); } static int ipnet_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { if (cmd != DDI_DETACH) return (DDI_FAILURE); ASSERT(dip == ipnet_dip); ddi_remove_minor_node(ipnet_dip, NULL); ipnet_dip = NULL; return (DDI_SUCCESS); } /* ARGSUSED */ static int ipnet_devinfo(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) { int error = DDI_FAILURE; switch (infocmd) { case DDI_INFO_DEVT2INSTANCE: *result = (void *)0; error = DDI_SUCCESS; break; case DDI_INFO_DEVT2DEVINFO: if (ipnet_dip != NULL) { *result = ipnet_dip; error = DDI_SUCCESS; } break; } return (error); } /* ARGSUSED */ static int ipnet_open(queue_t *rq, dev_t *dev, int oflag, int sflag, cred_t *crp) { ipnet_t *ipnet; netstack_t *ns = NULL; ipnet_stack_t *ips; int err = 0; zoneid_t zoneid = crgetzoneid(crp); /* * If the system is labeled, only the global zone is allowed to open * IP observability nodes. */ if (is_system_labeled() && zoneid != GLOBAL_ZONEID) return (EACCES); /* We don't support open as a module */ if (sflag & MODOPEN) return (ENOTSUP); /* This driver is self-cloning, we don't support re-open. */ if (rq->q_ptr != NULL) return (EBUSY); if ((ipnet = kmem_zalloc(sizeof (*ipnet), KM_NOSLEEP)) == NULL) return (ENOMEM); VERIFY((ns = netstack_find_by_cred(crp)) != NULL); ips = ns->netstack_ipnet; rq->q_ptr = WR(rq)->q_ptr = ipnet; ipnet->ipnet_rq = rq; ipnet->ipnet_minor = (minor_t)id_alloc(ipnet_minor_space); ipnet->ipnet_zoneid = zoneid; ipnet->ipnet_dlstate = DL_UNBOUND; ipnet->ipnet_sap = 0; ipnet->ipnet_ns = ns; /* * We need to hold ips_event_lock here as any NE_LIF_DOWN events need * to be processed after ipnet_if is set and the ipnet_t has been * inserted in the ips_str_list. */ mutex_enter(&ips->ips_event_lock); if (getminor(*dev) == IPNET_MINOR_LO) { ipnet->ipnet_flags |= IPNET_LOMODE; ipnet->ipnet_acceptfn = ipnet_loaccept; } else { ipnet->ipnet_acceptfn = ipnet_accept; ipnet->ipnet_if = ipnet_if_getby_dev(*dev, ips); if (ipnet->ipnet_if == NULL || !ipnet_if_in_zone(ipnet->ipnet_if, zoneid, ips)) { err = ENODEV; goto done; } } mutex_enter(&ips->ips_walkers_lock); while (ips->ips_walkers_cnt != 0) cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); list_insert_head(&ips->ips_str_list, ipnet); *dev = makedevice(getmajor(*dev), ipnet->ipnet_minor); qprocson(rq); /* * Only register our callback if we're the first open client; we call * unregister in close() for the last open client. */ if (list_head(&ips->ips_str_list) == list_tail(&ips->ips_str_list)) ipobs_register_hook(ns, ipnet_input); mutex_exit(&ips->ips_walkers_lock); done: mutex_exit(&ips->ips_event_lock); if (err != 0) { netstack_rele(ns); id_free(ipnet_minor_space, ipnet->ipnet_minor); if (ipnet->ipnet_if != NULL) ipnetif_refrele(ipnet->ipnet_if); kmem_free(ipnet, sizeof (*ipnet)); } return (err); } static int ipnet_close(queue_t *rq) { ipnet_t *ipnet = rq->q_ptr; ipnet_stack_t *ips = ipnet->ipnet_ns->netstack_ipnet; if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) ipnet_leave_allmulti(ipnet->ipnet_if, ips); if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) ipnet_leave_allmulti(ipnet->ipnet_if, ips); mutex_enter(&ips->ips_walkers_lock); while (ips->ips_walkers_cnt != 0) cv_wait(&ips->ips_walkers_cv, &ips->ips_walkers_lock); qprocsoff(rq); list_remove(&ips->ips_str_list, ipnet); if (ipnet->ipnet_if != NULL) ipnetif_refrele(ipnet->ipnet_if); id_free(ipnet_minor_space, ipnet->ipnet_minor); kmem_free(ipnet, sizeof (*ipnet)); if (list_is_empty(&ips->ips_str_list)) ipobs_unregister_hook(ips->ips_netstack, ipnet_input); mutex_exit(&ips->ips_walkers_lock); netstack_rele(ips->ips_netstack); return (0); } static int ipnet_wput(queue_t *q, mblk_t *mp) { switch (mp->b_datap->db_type) { case M_FLUSH: if (*mp->b_rptr & FLUSHW) { flushq(q, FLUSHDATA); *mp->b_rptr &= ~FLUSHW; } if (*mp->b_rptr & FLUSHR) qreply(q, mp); else freemsg(mp); break; case M_PROTO: case M_PCPROTO: ipnet_wputnondata(q, mp); break; case M_IOCTL: ipnet_ioctl(q, mp); break; case M_IOCDATA: ipnet_iocdata(q, mp); break; default: freemsg(mp); break; } return (0); } static int ipnet_rsrv(queue_t *q) { mblk_t *mp; while ((mp = getq(q)) != NULL) { ASSERT(DB_TYPE(mp) == M_DATA); if (canputnext(q)) { putnext(q, mp); } else { (void) putbq(q, mp); break; } } return (0); } static void ipnet_ioctl(queue_t *q, mblk_t *mp) { struct iocblk *iocp = (struct iocblk *)mp->b_rptr; switch (iocp->ioc_cmd) { case DLIOCRAW: miocack(q, mp, 0, 0); break; case DLIOCIPNETINFO: if (iocp->ioc_count == TRANSPARENT) { mcopyin(mp, NULL, sizeof (uint_t), NULL); qreply(q, mp); break; } /* Fallthrough, we don't support I_STR with DLIOCIPNETINFO. */ default: miocnak(q, mp, 0, EINVAL); break; } } static void ipnet_iocdata(queue_t *q, mblk_t *mp) { struct iocblk *iocp = (struct iocblk *)mp->b_rptr; ipnet_t *ipnet = q->q_ptr; switch (iocp->ioc_cmd) { case DLIOCIPNETINFO: if (*(int *)mp->b_cont->b_rptr == 1) ipnet->ipnet_flags |= IPNET_INFO; else if (*(int *)mp->b_cont->b_rptr == 0) ipnet->ipnet_flags &= ~IPNET_INFO; else goto iocnak; miocack(q, mp, 0, DL_IPNETINFO_VERSION); break; default: iocnak: miocnak(q, mp, 0, EINVAL); break; } } static void ipnet_wputnondata(queue_t *q, mblk_t *mp) { union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; t_uscalar_t prim = dlp->dl_primitive; switch (prim) { case DL_INFO_REQ: ipnet_inforeq(q, mp); break; case DL_UNBIND_REQ: ipnet_unbindreq(q, mp); break; case DL_BIND_REQ: ipnet_bindreq(q, mp); break; case DL_PROMISCON_REQ: ipnet_dlpromisconreq(q, mp); break; case DL_PROMISCOFF_REQ: ipnet_dlpromiscoffreq(q, mp); break; case DL_UNITDATA_REQ: case DL_DETACH_REQ: case DL_PHYS_ADDR_REQ: case DL_SET_PHYS_ADDR_REQ: case DL_ENABMULTI_REQ: case DL_DISABMULTI_REQ: case DL_ATTACH_REQ: dlerrorack(q, mp, prim, DL_UNSUPPORTED, 0); break; default: dlerrorack(q, mp, prim, DL_BADPRIM, 0); break; } } static void ipnet_inforeq(queue_t *q, mblk_t *mp) { dl_info_ack_t *dlip; size_t size = sizeof (dl_info_ack_t) + sizeof (ushort_t); if (MBLKL(mp) < DL_INFO_REQ_SIZE) { dlerrorack(q, mp, DL_INFO_REQ, DL_BADPRIM, 0); return; } if ((mp = mexchange(q, mp, size, M_PCPROTO, DL_INFO_ACK)) == NULL) return; dlip = (dl_info_ack_t *)mp->b_rptr; *dlip = ipnet_infoack; qreply(q, mp); } static void ipnet_bindreq(queue_t *q, mblk_t *mp) { union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; int32_t sap; ipnet_t *ipnet = q->q_ptr; if (MBLKL(mp) < DL_BIND_REQ_SIZE) { dlerrorack(q, mp, DL_BIND_REQ, DL_BADPRIM, 0); return; } sap = dlp->bind_req.dl_sap; if (sap != IPV4_VERSION && sap != IPV6_VERSION && sap != 0) { dlerrorack(q, mp, DL_BIND_REQ, DL_BADSAP, 0); } else { ipnet->ipnet_sap = sap; ipnet->ipnet_dlstate = DL_IDLE; dlbindack(q, mp, sap, 0, 0, 0, 0); } } static void ipnet_unbindreq(queue_t *q, mblk_t *mp) { ipnet_t *ipnet = q->q_ptr; if (MBLKL(mp) < DL_UNBIND_REQ_SIZE) { dlerrorack(q, mp, DL_UNBIND_REQ, DL_BADPRIM, 0); return; } if (ipnet->ipnet_dlstate != DL_IDLE) { dlerrorack(q, mp, DL_UNBIND_REQ, DL_OUTSTATE, 0); } else { ipnet->ipnet_dlstate = DL_UNBOUND; ipnet->ipnet_sap = 0; dlokack(q, mp, DL_UNBIND_REQ); } } static void ipnet_dlpromisconreq(queue_t *q, mblk_t *mp) { ipnet_t *ipnet = q->q_ptr; t_uscalar_t level; int err; if (MBLKL(mp) < DL_PROMISCON_REQ_SIZE) { dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); return; } if (ipnet->ipnet_flags & IPNET_LOMODE) { dlokack(q, mp, DL_PROMISCON_REQ); return; } level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { if ((err = ipnet_join_allmulti(ipnet->ipnet_if, ipnet->ipnet_ns->netstack_ipnet)) != 0) { dlerrorack(q, mp, DL_PROMISCON_REQ, DL_SYSERR, err); return; } } switch (level) { case DL_PROMISC_PHYS: ipnet->ipnet_flags |= IPNET_PROMISC_PHYS; break; case DL_PROMISC_SAP: ipnet->ipnet_flags |= IPNET_PROMISC_SAP; break; case DL_PROMISC_MULTI: ipnet->ipnet_flags |= IPNET_PROMISC_MULTI; break; default: dlerrorack(q, mp, DL_PROMISCON_REQ, DL_BADPRIM, 0); return; } dlokack(q, mp, DL_PROMISCON_REQ); } static void ipnet_dlpromiscoffreq(queue_t *q, mblk_t *mp) { ipnet_t *ipnet = q->q_ptr; t_uscalar_t level; uint16_t orig_ipnet_flags = ipnet->ipnet_flags; if (MBLKL(mp) < DL_PROMISCOFF_REQ_SIZE) { dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); return; } if (ipnet->ipnet_flags & IPNET_LOMODE) { dlokack(q, mp, DL_PROMISCOFF_REQ); return; } level = ((dl_promiscon_req_t *)mp->b_rptr)->dl_level; switch (level) { case DL_PROMISC_PHYS: if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) ipnet->ipnet_flags &= ~IPNET_PROMISC_PHYS; break; case DL_PROMISC_SAP: if (ipnet->ipnet_flags & IPNET_PROMISC_SAP) ipnet->ipnet_flags &= ~IPNET_PROMISC_SAP; break; case DL_PROMISC_MULTI: if (ipnet->ipnet_flags & IPNET_PROMISC_MULTI) ipnet->ipnet_flags &= ~IPNET_PROMISC_MULTI; break; default: dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_BADPRIM, 0); return; } if (orig_ipnet_flags == ipnet->ipnet_flags) { dlerrorack(q, mp, DL_PROMISCOFF_REQ, DL_NOTENAB, 0); return; } if (level == DL_PROMISC_PHYS || level == DL_PROMISC_MULTI) { ipnet_leave_allmulti(ipnet->ipnet_if, ipnet->ipnet_ns->netstack_ipnet); } dlokack(q, mp, DL_PROMISCOFF_REQ); } static int ipnet_join_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) { int err = 0; ip_stack_t *ipst = ips->ips_netstack->netstack_ip; uint64_t index = ipnetif->if_index; mutex_enter(&ips->ips_event_lock); if (ipnetif->if_multicnt == 0) { ASSERT((ipnetif->if_flags & (IPNETIF_IPV4ALLMULTI | IPNETIF_IPV6ALLMULTI)) == 0); if (ipnetif->if_flags & IPNETIF_IPV4PLUMBED) { err = ip_join_allmulti(index, B_FALSE, ipst); if (err != 0) goto done; ipnetif->if_flags |= IPNETIF_IPV4ALLMULTI; } if (ipnetif->if_flags & IPNETIF_IPV6PLUMBED) { err = ip_join_allmulti(index, B_TRUE, ipst); if (err != 0 && (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI)) { (void) ip_leave_allmulti(index, B_FALSE, ipst); ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; goto done; } ipnetif->if_flags |= IPNETIF_IPV6ALLMULTI; } } ipnetif->if_multicnt++; done: mutex_exit(&ips->ips_event_lock); return (err); } static void ipnet_leave_allmulti(ipnetif_t *ipnetif, ipnet_stack_t *ips) { int err; ip_stack_t *ipst = ips->ips_netstack->netstack_ip; uint64_t index = ipnetif->if_index; mutex_enter(&ips->ips_event_lock); ASSERT(ipnetif->if_multicnt != 0); if (--ipnetif->if_multicnt == 0) { if (ipnetif->if_flags & IPNETIF_IPV4ALLMULTI) { err = ip_leave_allmulti(index, B_FALSE, ipst); ASSERT(err == 0 || err == ENODEV); ipnetif->if_flags &= ~IPNETIF_IPV4ALLMULTI; } if (ipnetif->if_flags & IPNETIF_IPV6ALLMULTI) { err = ip_leave_allmulti(index, B_TRUE, ipst); ASSERT(err == 0 || err == ENODEV); ipnetif->if_flags &= ~IPNETIF_IPV6ALLMULTI; } } mutex_exit(&ips->ips_event_lock); } static mblk_t * ipnet_addheader(ipobs_hook_data_t *ihd, mblk_t *mp) { mblk_t *dlhdr; dl_ipnetinfo_t *dl; if ((dlhdr = allocb(sizeof (dl_ipnetinfo_t), BPRI_HI)) == NULL) { freemsg(mp); return (NULL); } dl = (dl_ipnetinfo_t *)dlhdr->b_rptr; dl->dli_version = DL_IPNETINFO_VERSION; dl->dli_len = htons(sizeof (*dl)); dl->dli_ipver = ihd->ihd_ipver; dl->dli_srczone = BE_64((uint64_t)ihd->ihd_zsrc); dl->dli_dstzone = BE_64((uint64_t)ihd->ihd_zdst); dlhdr->b_wptr += sizeof (*dl); dlhdr->b_cont = mp; return (dlhdr); } static ipnet_addrtype_t ipnet_get_addrtype(ipnet_t *ipnet, ipnet_addrp_t *addr) { list_t *list; ipnetif_t *ipnetif = ipnet->ipnet_if; ipnetif_addr_t *ifaddr; ipnet_addrtype_t addrtype = IPNETADDR_UNKNOWN; /* First check if the address is multicast or limited broadcast. */ switch (addr->iap_family) { case AF_INET: if (CLASSD(*(addr->iap_addr4)) || *(addr->iap_addr4) == INADDR_BROADCAST) return (IPNETADDR_MBCAST); break; case AF_INET6: if (IN6_IS_ADDR_MULTICAST(addr->iap_addr6)) return (IPNETADDR_MBCAST); break; } /* * Walk the address list to see if the address belongs to our * interface or is one of our subnet broadcast addresses. */ mutex_enter(&ipnetif->if_addr_lock); list = (addr->iap_family == AF_INET) ? &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list; for (ifaddr = list_head(list); ifaddr != NULL && addrtype == IPNETADDR_UNKNOWN; ifaddr = list_next(list, ifaddr)) { /* * If we're not in the global zone, then only look at * addresses in our zone. */ if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && ipnet->ipnet_zoneid != ifaddr->ifa_zone) continue; switch (addr->iap_family) { case AF_INET: if (ifaddr->ifa_ip4addr != INADDR_ANY && *(addr->iap_addr4) == ifaddr->ifa_ip4addr) addrtype = IPNETADDR_MYADDR; else if (ifaddr->ifa_brdaddr != INADDR_ANY && *(addr->iap_addr4) == ifaddr->ifa_brdaddr) addrtype = IPNETADDR_MBCAST; break; case AF_INET6: if (IN6_ARE_ADDR_EQUAL(addr->iap_addr6, &ifaddr->ifa_ip6addr)) addrtype = IPNETADDR_MYADDR; break; } } mutex_exit(&ipnetif->if_addr_lock); return (addrtype); } /* * Verify if the packet contained in ihd should be passed up to the * ipnet client stream. */ static boolean_t ipnet_accept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, ipnet_addrp_t *dst) { uint64_t ifindex = ipnet->ipnet_if->if_index; ipnet_addrtype_t srctype, dsttype; srctype = ipnet_get_addrtype(ipnet, src); dsttype = ipnet_get_addrtype(ipnet, dst); /* * Do not allow an ipnet stream to see packets that are not from or to * its zone. The exception is when zones are using the shared stack * model. In this case, streams in the global zone have visibility * into other shared-stack zones, and broadcast and multicast traffic * is visible by all zones in the stack. */ if (ipnet->ipnet_zoneid != GLOBAL_ZONEID && dsttype != IPNETADDR_MBCAST) { if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && ipnet->ipnet_zoneid != ihd->ihd_zdst) return (B_FALSE); } /* * If DL_PROMISC_SAP isn't enabled, then the bound SAP must match the * packet's IP version. */ if (!(ipnet->ipnet_flags & IPNET_PROMISC_SAP) && ipnet->ipnet_sap != ihd->ihd_ipver) return (B_FALSE); /* If the destination address is ours, then accept the packet. */ if (dsttype == IPNETADDR_MYADDR) return (B_TRUE); /* * If DL_PROMISC_PHYS is enabled, then we can see all packets that are * sent or received on the interface we're observing, or packets that * have our source address (this allows us to see packets we send). */ if (ipnet->ipnet_flags & IPNET_PROMISC_PHYS) { if (ihd->ihd_ifindex == ifindex || srctype == IPNETADDR_MYADDR) return (B_TRUE); } /* * We accept multicast and broadcast packets transmitted or received * on the interface we're observing. */ if (dsttype == IPNETADDR_MBCAST && ihd->ihd_ifindex == ifindex) return (B_TRUE); return (B_FALSE); } /* * Verify if the packet contained in ihd should be passed up to the ipnet * client stream that's in IPNET_LOMODE. */ /* ARGSUSED */ static boolean_t ipnet_loaccept(ipnet_t *ipnet, ipobs_hook_data_t *ihd, ipnet_addrp_t *src, ipnet_addrp_t *dst) { if (ihd->ihd_htype != IPOBS_HOOK_LOCAL) return (B_FALSE); /* * An ipnet stream must not see packets that are not from/to its zone. */ if (ipnet->ipnet_zoneid != GLOBAL_ZONEID) { if (ipnet->ipnet_zoneid != ihd->ihd_zsrc && ipnet->ipnet_zoneid != ihd->ihd_zdst) return (B_FALSE); } return (ipnet->ipnet_sap == 0 || ipnet->ipnet_sap == ihd->ihd_ipver); } static void ipnet_dispatch(void *arg) { mblk_t *mp = arg; ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; ipnet_t *ipnet; mblk_t *netmp; list_t *list; ipnet_stack_t *ips = ihd->ihd_stack->netstack_ipnet; ipnet_addrp_t src, dst; if (ihd->ihd_ipver == IPV4_VERSION) { src.iap_family = dst.iap_family = AF_INET; src.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_src; dst.iap_addr4 = &((ipha_t *)(ihd->ihd_mp->b_rptr))->ipha_dst; } else { src.iap_family = dst.iap_family = AF_INET6; src.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_src; dst.iap_addr6 = &((ip6_t *)(ihd->ihd_mp->b_rptr))->ip6_dst; } ipnet_walkers_inc(ips); list = &ips->ips_str_list; for (ipnet = list_head(list); ipnet != NULL; ipnet = list_next(list, ipnet)) { if (!(*ipnet->ipnet_acceptfn)(ipnet, ihd, &src, &dst)) continue; if (list_next(list, ipnet) == NULL) { netmp = ihd->ihd_mp; ihd->ihd_mp = NULL; } else { if ((netmp = dupmsg(ihd->ihd_mp)) == NULL && (netmp = copymsg(ihd->ihd_mp)) == NULL) { atomic_inc_64(&ips->ips_drops); continue; } } if (ipnet->ipnet_flags & IPNET_INFO) { if ((netmp = ipnet_addheader(ihd, netmp)) == NULL) { atomic_inc_64(&ips->ips_drops); continue; } } if (ipnet->ipnet_rq->q_first == NULL && canputnext(ipnet->ipnet_rq)) { putnext(ipnet->ipnet_rq, netmp); } else if (canput(ipnet->ipnet_rq)) { (void) putq(ipnet->ipnet_rq, netmp); } else { freemsg(netmp); atomic_inc_64(&ips->ips_drops); } } ipnet_walkers_dec(ips); freemsg(ihd->ihd_mp); freemsg(mp); } static void ipnet_input(mblk_t *mp) { ipobs_hook_data_t *ihd = (ipobs_hook_data_t *)mp->b_rptr; if (ddi_taskq_dispatch(ipnet_taskq, ipnet_dispatch, mp, DDI_NOSLEEP) != DDI_SUCCESS) { atomic_inc_64(&ihd->ihd_stack->netstack_ipnet->ips_drops); freemsg(ihd->ihd_mp); freemsg(mp); } } /* * Create a new ipnetif_t and new minor node for it. If creation is * successful the new ipnetif_t is inserted into an avl_tree * containing ipnetif's for this stack instance. */ static ipnetif_t * ipnet_create_if(const char *name, uint64_t index, ipnet_stack_t *ips) { ipnetif_t *ipnetif; avl_index_t where = 0; minor_t ifminor; /* * Because ipnet_create_if() can be called from a NIC event * callback, it should not block. */ ifminor = (minor_t)id_alloc_nosleep(ipnet_minor_space); if (ifminor == (minor_t)-1) return (NULL); if ((ipnetif = kmem_zalloc(sizeof (*ipnetif), KM_NOSLEEP)) == NULL) { id_free(ipnet_minor_space, ifminor); return (NULL); } (void) strlcpy(ipnetif->if_name, name, LIFNAMSIZ); ipnetif->if_index = index; mutex_init(&ipnetif->if_addr_lock, NULL, MUTEX_DEFAULT, 0); list_create(&ipnetif->if_ip4addr_list, sizeof (ipnetif_addr_t), offsetof(ipnetif_addr_t, ifa_link)); list_create(&ipnetif->if_ip6addr_list, sizeof (ipnetif_addr_t), offsetof(ipnetif_addr_t, ifa_link)); ipnetif->if_dev = makedevice(ipnet_major, ifminor); mutex_init(&ipnetif->if_reflock, NULL, MUTEX_DEFAULT, 0); ipnetif->if_refcnt = 1; mutex_enter(&ips->ips_avl_lock); VERIFY(avl_find(&ips->ips_avl_by_index, &index, &where) == NULL); avl_insert(&ips->ips_avl_by_index, ipnetif, where); VERIFY(avl_find(&ips->ips_avl_by_name, (void *)name, &where) == NULL); avl_insert(&ips->ips_avl_by_name, ipnetif, where); mutex_exit(&ips->ips_avl_lock); return (ipnetif); } static void ipnet_remove_if(ipnetif_t *ipnetif, ipnet_stack_t *ips) { ipnet_t *ipnet; ipnet_walkers_inc(ips); /* Send a SIGHUP to all open streams associated with this ipnetif. */ for (ipnet = list_head(&ips->ips_str_list); ipnet != NULL; ipnet = list_next(&ips->ips_str_list, ipnet)) { if (ipnet->ipnet_if == ipnetif) (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); } ipnet_walkers_dec(ips); mutex_enter(&ips->ips_avl_lock); avl_remove(&ips->ips_avl_by_index, ipnetif); avl_remove(&ips->ips_avl_by_name, ipnetif); mutex_exit(&ips->ips_avl_lock); /* Release the reference we implicitly held in ipnet_create_if(). */ ipnetif_refrele(ipnetif); } static void ipnet_purge_addrlist(list_t *addrlist) { ipnetif_addr_t *ifa; while ((ifa = list_head(addrlist)) != NULL) { list_remove(addrlist, ifa); kmem_free(ifa, sizeof (*ifa)); } } static void ipnet_free_if(ipnetif_t *ipnetif) { ASSERT(ipnetif->if_refcnt == 0); /* Remove IPv4/v6 address lists from the ipnetif */ ipnet_purge_addrlist(&ipnetif->if_ip4addr_list); list_destroy(&ipnetif->if_ip4addr_list); ipnet_purge_addrlist(&ipnetif->if_ip6addr_list); list_destroy(&ipnetif->if_ip6addr_list); mutex_destroy(&ipnetif->if_addr_lock); mutex_destroy(&ipnetif->if_reflock); id_free(ipnet_minor_space, getminor(ipnetif->if_dev)); kmem_free(ipnetif, sizeof (*ipnetif)); } /* * Create an ipnetif_addr_t with the given logical interface id (lif) * and add it to the supplied ipnetif. The lif is the netinfo * representation of logical interface id, and we use this id to match * incoming netinfo events against our lists of addresses. */ static void ipnet_add_ifaddr(uint64_t lif, ipnetif_t *ipnetif, net_handle_t nd) { ipnetif_addr_t *ifaddr; zoneid_t zoneid; struct sockaddr_in bcast; struct sockaddr_storage addr; net_ifaddr_t type = NA_ADDRESS; uint64_t phyif = ipnetif->if_index; if (net_getlifaddr(nd, phyif, lif, 1, &type, &addr) != 0 || net_getlifzone(nd, phyif, lif, &zoneid) != 0) return; if ((ifaddr = kmem_alloc(sizeof (*ifaddr), KM_NOSLEEP)) == NULL) return; ifaddr->ifa_zone = zoneid; ifaddr->ifa_id = lif; switch (addr.ss_family) { case AF_INET: ifaddr->ifa_ip4addr = ((struct sockaddr_in *)&addr)->sin_addr.s_addr; /* * Try and get the broadcast address. Note that it's okay for * an interface to not have a broadcast address, so we don't * fail the entire operation if net_getlifaddr() fails here. */ type = NA_BROADCAST; if (net_getlifaddr(nd, phyif, lif, 1, &type, &bcast) == 0) ifaddr->ifa_brdaddr = bcast.sin_addr.s_addr; break; case AF_INET6: ifaddr->ifa_ip6addr = ((struct sockaddr_in6 *)&addr)->sin6_addr; break; } mutex_enter(&ipnetif->if_addr_lock); list_insert_tail(addr.ss_family == AF_INET ? &ipnetif->if_ip4addr_list : &ipnetif->if_ip6addr_list, ifaddr); mutex_exit(&ipnetif->if_addr_lock); } static void ipnet_delete_ifaddr(ipnetif_addr_t *ifaddr, ipnetif_t *ipnetif, boolean_t isv6) { mutex_enter(&ipnetif->if_addr_lock); list_remove(isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list, ifaddr); mutex_exit(&ipnetif->if_addr_lock); kmem_free(ifaddr, sizeof (*ifaddr)); } static void ipnet_plumb_ev(uint64_t ifindex, const char *ifname, ipnet_stack_t *ips, boolean_t isv6) { ipnetif_t *ipnetif; boolean_t refrele_needed = B_TRUE; if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) { ipnetif = ipnet_create_if(ifname, ifindex, ips); refrele_needed = B_FALSE; } if (ipnetif != NULL) { ipnetif->if_flags |= isv6 ? IPNETIF_IPV6PLUMBED : IPNETIF_IPV4PLUMBED; } if (ipnetif->if_multicnt != 0) { if (ip_join_allmulti(ifindex, isv6, ips->ips_netstack->netstack_ip) == 0) { ipnetif->if_flags |= isv6 ? IPNETIF_IPV6ALLMULTI : IPNETIF_IPV4ALLMULTI; } } if (refrele_needed) ipnetif_refrele(ipnetif); } static void ipnet_unplumb_ev(uint64_t ifindex, ipnet_stack_t *ips, boolean_t isv6) { ipnetif_t *ipnetif; if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) return; mutex_enter(&ipnetif->if_addr_lock); ipnet_purge_addrlist(isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list); mutex_exit(&ipnetif->if_addr_lock); /* * Note that we have one ipnetif for both IPv4 and IPv6, but we receive * separate NE_UNPLUMB events for IPv4 and IPv6. We remove the ipnetif * if both IPv4 and IPv6 interfaces have been unplumbed. */ ipnetif->if_flags &= isv6 ? ~IPNETIF_IPV6PLUMBED : ~IPNETIF_IPV4PLUMBED; if (!(ipnetif->if_flags & (IPNETIF_IPV4PLUMBED | IPNETIF_IPV6PLUMBED))) ipnet_remove_if(ipnetif, ips); ipnetif_refrele(ipnetif); } static void ipnet_lifup_ev(uint64_t ifindex, uint64_t lifindex, net_handle_t nd, ipnet_stack_t *ips, boolean_t isv6) { ipnetif_t *ipnetif; ipnetif_addr_t *ifaddr; if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) return; if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) { /* * We must have missed a NE_LIF_DOWN event. Delete this * ifaddr and re-create it. */ ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); } ipnet_add_ifaddr(lifindex, ipnetif, nd); ipnetif_refrele(ipnetif); } static void ipnet_lifdown_ev(uint64_t ifindex, uint64_t lifindex, ipnet_stack_t *ips, boolean_t isv6) { ipnetif_t *ipnetif; ipnetif_addr_t *ifaddr; if ((ipnetif = ipnet_if_getby_index(ifindex, ips)) == NULL) return; if ((ifaddr = ipnet_match_lif(ipnetif, lifindex, isv6)) != NULL) ipnet_delete_ifaddr(ifaddr, ipnetif, isv6); ipnetif_refrele(ipnetif); /* * Make sure that open streams on this ipnetif are still allowed to * have it open. */ ipnet_if_zonecheck(ipnetif, ips); } /* * This callback from the NIC event framework dispatches a taskq as the event * handlers may block. */ /* ARGSUSED */ static int ipnet_nicevent_cb(hook_event_token_t token, hook_data_t info, void *arg) { ipnet_stack_t *ips = arg; hook_nic_event_t *hn = (hook_nic_event_t *)info; ipnet_nicevent_t *ipne; if ((ipne = kmem_alloc(sizeof (ipnet_nicevent_t), KM_NOSLEEP)) == NULL) return (0); ipne->ipne_event = hn->hne_event; ipne->ipne_protocol = hn->hne_protocol; ipne->ipne_stackid = ips->ips_netstack->netstack_stackid; ipne->ipne_ifindex = hn->hne_nic; ipne->ipne_lifindex = hn->hne_lif; if (hn->hne_datalen != 0) { (void) strlcpy(ipne->ipne_ifname, hn->hne_data, sizeof (ipne->ipne_ifname)); } (void) ddi_taskq_dispatch(ipnet_nicevent_taskq, ipnet_nicevent_task, ipne, DDI_NOSLEEP); return (0); } static void ipnet_nicevent_task(void *arg) { ipnet_nicevent_t *ipne = arg; netstack_t *ns; ipnet_stack_t *ips; boolean_t isv6; if ((ns = netstack_find_by_stackid(ipne->ipne_stackid)) == NULL) goto done; ips = ns->netstack_ipnet; isv6 = (ipne->ipne_protocol == ips->ips_ndv6); mutex_enter(&ips->ips_event_lock); switch (ipne->ipne_event) { case NE_PLUMB: ipnet_plumb_ev(ipne->ipne_ifindex, ipne->ipne_ifname, ips, isv6); break; case NE_UNPLUMB: ipnet_unplumb_ev(ipne->ipne_ifindex, ips, isv6); break; case NE_LIF_UP: ipnet_lifup_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ipne->ipne_protocol, ips, isv6); break; case NE_LIF_DOWN: ipnet_lifdown_ev(ipne->ipne_ifindex, ipne->ipne_lifindex, ips, isv6); break; default: break; } mutex_exit(&ips->ips_event_lock); done: if (ns != NULL) netstack_rele(ns); kmem_free(ipne, sizeof (ipnet_nicevent_t)); } dev_t ipnet_if_getdev(char *name, zoneid_t zoneid) { netstack_t *ns; ipnet_stack_t *ips; ipnetif_t *ipnetif; dev_t dev = (dev_t)-1; if (is_system_labeled() && zoneid != GLOBAL_ZONEID) return (dev); if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) return (dev); ips = ns->netstack_ipnet; mutex_enter(&ips->ips_avl_lock); if ((ipnetif = avl_find(&ips->ips_avl_by_name, name, NULL)) != NULL) { if (ipnet_if_in_zone(ipnetif, zoneid, ips)) dev = ipnetif->if_dev; } mutex_exit(&ips->ips_avl_lock); netstack_rele(ns); return (dev); } static ipnetif_t * ipnet_if_getby_index(uint64_t id, ipnet_stack_t *ips) { ipnetif_t *ipnetif; mutex_enter(&ips->ips_avl_lock); if ((ipnetif = avl_find(&ips->ips_avl_by_index, &id, NULL)) != NULL) ipnetif_refhold(ipnetif); mutex_exit(&ips->ips_avl_lock); return (ipnetif); } static ipnetif_t * ipnet_if_getby_dev(dev_t dev, ipnet_stack_t *ips) { ipnetif_t *ipnetif; avl_tree_t *tree; mutex_enter(&ips->ips_avl_lock); tree = &ips->ips_avl_by_index; for (ipnetif = avl_first(tree); ipnetif != NULL; ipnetif = avl_walk(tree, ipnetif, AVL_AFTER)) { if (ipnetif->if_dev == dev) { ipnetif_refhold(ipnetif); break; } } mutex_exit(&ips->ips_avl_lock); return (ipnetif); } static ipnetif_addr_t * ipnet_match_lif(ipnetif_t *ipnetif, lif_if_t lid, boolean_t isv6) { ipnetif_addr_t *ifaddr; list_t *list; mutex_enter(&ipnetif->if_addr_lock); list = isv6 ? &ipnetif->if_ip6addr_list : &ipnetif->if_ip4addr_list; for (ifaddr = list_head(list); ifaddr != NULL; ifaddr = list_next(list, ifaddr)) { if (lid == ifaddr->ifa_id) break; } mutex_exit(&ipnetif->if_addr_lock); return (ifaddr); } /* ARGSUSED */ static void * ipnet_stack_init(netstackid_t stackid, netstack_t *ns) { ipnet_stack_t *ips; ips = kmem_zalloc(sizeof (*ips), KM_SLEEP); ips->ips_netstack = ns; mutex_init(&ips->ips_avl_lock, NULL, MUTEX_DEFAULT, 0); avl_create(&ips->ips_avl_by_index, ipnet_if_compare_index, sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_index)); avl_create(&ips->ips_avl_by_name, ipnet_if_compare_name, sizeof (ipnetif_t), offsetof(ipnetif_t, if_avl_by_name)); mutex_init(&ips->ips_walkers_lock, NULL, MUTEX_DEFAULT, NULL); cv_init(&ips->ips_walkers_cv, NULL, CV_DRIVER, NULL); list_create(&ips->ips_str_list, sizeof (ipnet_t), offsetof(ipnet_t, ipnet_next)); ipnet_register_netihook(ips); return (ips); } /* ARGSUSED */ static void ipnet_stack_fini(netstackid_t stackid, void *arg) { ipnet_stack_t *ips = arg; ipnetif_t *ipnetif, *nipnetif; if (ips->ips_ndv4 != NULL) { VERIFY(net_hook_unregister(ips->ips_ndv4, NH_NIC_EVENTS, ips->ips_nicevents) == 0); VERIFY(net_protocol_release(ips->ips_ndv4) == 0); } if (ips->ips_ndv6 != NULL) { VERIFY(net_hook_unregister(ips->ips_ndv6, NH_NIC_EVENTS, ips->ips_nicevents) == 0); VERIFY(net_protocol_release(ips->ips_ndv6) == 0); } hook_free(ips->ips_nicevents); for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; ipnetif = nipnetif) { nipnetif = AVL_NEXT(&ips->ips_avl_by_index, ipnetif); ipnet_remove_if(ipnetif, ips); } avl_destroy(&ips->ips_avl_by_index); avl_destroy(&ips->ips_avl_by_name); mutex_destroy(&ips->ips_avl_lock); mutex_destroy(&ips->ips_walkers_lock); cv_destroy(&ips->ips_walkers_cv); list_destroy(&ips->ips_str_list); kmem_free(ips, sizeof (*ips)); } /* Do any of the addresses in addrlist belong the supplied zoneid? */ static boolean_t ipnet_addrs_in_zone(list_t *addrlist, zoneid_t zoneid) { ipnetif_addr_t *ifa; for (ifa = list_head(addrlist); ifa != NULL; ifa = list_next(addrlist, ifa)) { if (ifa->ifa_zone == zoneid) return (B_TRUE); } return (B_FALSE); } /* Should the supplied ipnetif be visible from the supplied zoneid? */ static boolean_t ipnet_if_in_zone(ipnetif_t *ipnetif, zoneid_t zoneid, ipnet_stack_t *ips) { int ret; /* * The global zone has visibility into all interfaces in the global * stack, and exclusive stack zones have visibility into all * interfaces in their stack. */ if (zoneid == GLOBAL_ZONEID || ips->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) return (B_TRUE); /* * Shared-stack zones only have visibility for interfaces that have * addresses in their zone. */ mutex_enter(&ipnetif->if_addr_lock); ret = ipnet_addrs_in_zone(&ipnetif->if_ip4addr_list, zoneid) || ipnet_addrs_in_zone(&ipnetif->if_ip6addr_list, zoneid); mutex_exit(&ipnetif->if_addr_lock); return (ret); } /* * Verify that any ipnet_t that has a reference to the supplied ipnetif should * still be allowed to have it open. A given ipnet_t may no longer be allowed * to have an ipnetif open if there are no longer any addresses that belong to * the ipnetif in the ipnet_t's non-global shared-stack zoneid. If that's the * case, send the ipnet_t an M_HANGUP. */ static void ipnet_if_zonecheck(ipnetif_t *ipnetif, ipnet_stack_t *ips) { list_t *strlist = &ips->ips_str_list; ipnet_t *ipnet; ipnet_walkers_inc(ips); for (ipnet = list_head(strlist); ipnet != NULL; ipnet = list_next(strlist, ipnet)) { if (ipnet->ipnet_if != ipnetif) continue; if (!ipnet_if_in_zone(ipnetif, ipnet->ipnet_zoneid, ips)) (void) putnextctl(ipnet->ipnet_rq, M_HANGUP); } ipnet_walkers_dec(ips); } void ipnet_walk_if(ipnet_walkfunc_t *cb, void *arg, zoneid_t zoneid) { ipnetif_t *ipnetif; list_t cbdata; ipnetif_cbdata_t *cbnode; netstack_t *ns; ipnet_stack_t *ips; /* * On labeled systems, non-global zones shouldn't see anything * in /dev/ipnet. */ if (is_system_labeled() && zoneid != GLOBAL_ZONEID) return; if ((ns = netstack_find_by_zoneid(zoneid)) == NULL) return; ips = ns->netstack_ipnet; list_create(&cbdata, sizeof (ipnetif_cbdata_t), offsetof(ipnetif_cbdata_t, ic_next)); mutex_enter(&ips->ips_avl_lock); for (ipnetif = avl_first(&ips->ips_avl_by_index); ipnetif != NULL; ipnetif = avl_walk(&ips->ips_avl_by_index, ipnetif, AVL_AFTER)) { if (!ipnet_if_in_zone(ipnetif, zoneid, ips)) continue; cbnode = kmem_zalloc(sizeof (ipnetif_cbdata_t), KM_SLEEP); (void) strlcpy(cbnode->ic_ifname, ipnetif->if_name, LIFNAMSIZ); cbnode->ic_dev = ipnetif->if_dev; list_insert_head(&cbdata, cbnode); } mutex_exit(&ips->ips_avl_lock); while ((cbnode = list_head(&cbdata)) != NULL) { cb(cbnode->ic_ifname, arg, cbnode->ic_dev); list_remove(&cbdata, cbnode); kmem_free(cbnode, sizeof (ipnetif_cbdata_t)); } list_destroy(&cbdata); netstack_rele(ns); } static int ipnet_if_compare_index(const void *index_ptr, const void *ipnetifp) { int64_t index1 = *((int64_t *)index_ptr); int64_t index2 = (int64_t)((ipnetif_t *)ipnetifp)->if_index; return (SIGNOF(index2 - index1)); } static int ipnet_if_compare_name(const void *name_ptr, const void *ipnetifp) { int res; res = strcmp(((ipnetif_t *)ipnetifp)->if_name, name_ptr); return (SIGNOF(res)); } static void ipnetif_refhold(ipnetif_t *ipnetif) { mutex_enter(&ipnetif->if_reflock); ipnetif->if_refcnt++; mutex_exit(&ipnetif->if_reflock); } static void ipnetif_refrele(ipnetif_t *ipnetif) { mutex_enter(&ipnetif->if_reflock); ASSERT(ipnetif->if_refcnt != 0); if (--ipnetif->if_refcnt == 0) ipnet_free_if(ipnetif); else mutex_exit(&ipnetif->if_reflock); } static void ipnet_walkers_inc(ipnet_stack_t *ips) { mutex_enter(&ips->ips_walkers_lock); ips->ips_walkers_cnt++; mutex_exit(&ips->ips_walkers_lock); } static void ipnet_walkers_dec(ipnet_stack_t *ips) { mutex_enter(&ips->ips_walkers_lock); ASSERT(ips->ips_walkers_cnt != 0); if (--ips->ips_walkers_cnt == 0) cv_broadcast(&ips->ips_walkers_cv); mutex_exit(&ips->ips_walkers_lock); }