/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include <sys/types.h> #include <sys/sysmacros.h> #include <sys/conf.h> #include <sys/cmn_err.h> #include <sys/list.h> #include <sys/kmem.h> #include <sys/stream.h> #include <sys/modctl.h> #include <sys/ddi.h> #include <sys/sunddi.h> #include <sys/atomic.h> #include <sys/stat.h> #include <sys/modhash.h> #include <sys/strsubr.h> #include <sys/strsun.h> #include <sys/sdt.h> #include <sys/mac.h> #include <sys/mac_impl.h> #include <sys/mac_client_impl.h> #include <sys/mac_client_priv.h> #include <sys/mac_flow_impl.h> /* * Broadcast and multicast traffic must be distributed to the MAC clients * that are defined on top of the same MAC. The set of * destinations to which a multicast packet must be sent is a subset * of all MAC clients defined on top of the MAC. A MAC client can be member * of more than one such subset. * * To accomodate these requirements, we introduce broadcast groups. * A broadcast group is associated with a broadcast or multicast * address. The members of a broadcast group consist of the MAC clients * that should received copies of packets sent to the address * associated with the group, and are defined on top of the * same MAC. * * The broadcast groups defined on top of a MAC are chained, * hanging off the mac_impl_t. The broadcast group id's are * unique globally (tracked by mac_bcast_id). */ /* * The same MAC client may be added for different <addr,vid> tuple, * we maintain a ref count for the number of times it has been added * to account for deleting the MAC client from the group. */ typedef struct mac_bcast_grp_mcip_s { mac_client_impl_t *mgb_client; int mgb_client_ref; } mac_bcast_grp_mcip_t; typedef struct mac_bcast_grp_s { /* Protected by */ struct mac_bcast_grp_s *mbg_next; /* SL */ void *mbg_addr; /* SL */ uint16_t mbg_vid; /* SL */ mac_impl_t *mbg_mac_impl; /* WO */ mac_addrtype_t mbg_addrtype; /* WO */ flow_entry_t *mbg_flow_ent; /* WO */ mac_bcast_grp_mcip_t *mbg_clients; /* mi_rw_lock */ uint_t mbg_nclients; /* mi_rw_lock */ uint_t mbg_nclients_alloc; /* SL */ uint64_t mbg_clients_gen; /* mi_rw_lock */ uint32_t mbg_id; /* atomic */ } mac_bcast_grp_t; static kmem_cache_t *mac_bcast_grp_cache; static uint32_t mac_bcast_id = 0; void mac_bcast_init(void) { mac_bcast_grp_cache = kmem_cache_create("mac_bcast_grp_cache", sizeof (mac_bcast_grp_t), 0, NULL, NULL, NULL, NULL, NULL, 0); } void mac_bcast_fini(void) { kmem_cache_destroy(mac_bcast_grp_cache); } mac_impl_t * mac_bcast_grp_mip(void *grp) { mac_bcast_grp_t *bcast_grp = grp; return (bcast_grp->mbg_mac_impl); } /* * Free the specific broadcast group. Invoked when the last reference * to the group is released. */ void mac_bcast_grp_free(void *bcast_grp) { mac_bcast_grp_t *grp = bcast_grp; mac_impl_t *mip = grp->mbg_mac_impl; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); ASSERT(grp->mbg_addr != NULL); kmem_free(grp->mbg_addr, mip->mi_type->mt_addr_length); kmem_free(grp->mbg_clients, grp->mbg_nclients_alloc * sizeof (mac_bcast_grp_mcip_t)); mip->mi_bcast_ngrps--; kmem_cache_free(mac_bcast_grp_cache, grp); } /* * arg1: broadcast group * arg2: sender MAC client if it is being sent by a MAC client, * NULL if it was received from the wire. */ void mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) { mac_bcast_grp_t *grp = arg1; mac_client_impl_t *src_mcip = arg2, *dst_mcip; mac_impl_t *mip = grp->mbg_mac_impl; uint64_t gen; uint_t i; mblk_t *mp_chain1; flow_entry_t *flent; int err; rw_enter(&mip->mi_rw_lock, RW_READER); /* * Pass a copy of the mp chain to every MAC client except the sender * MAC client, if the packet was not received from the underlying NIC. * * The broadcast group lock should not be held across calls to * the flow's callback function, since the same group could * potentially be accessed from the same context. When the lock * is reacquired, changes to the broadcast group while the lock * was released are caught using a generation counter incremented * each time the list of MAC clients associated with the broadcast * group is changed. */ for (i = 0; i < grp->mbg_nclients_alloc; i++) { dst_mcip = grp->mbg_clients[i].mgb_client; if (dst_mcip == NULL) continue; flent = dst_mcip->mci_flent; if (flent == NULL || dst_mcip == src_mcip) { /* * Don't send a copy of the packet back to * its sender. */ continue; } /* * It is important to hold a reference on the * flow_ent here. */ if ((mp_chain1 = mac_copymsgchain_cksum(mp_chain)) == NULL) break; /* * Fix the checksum for packets originating * from the local machine. */ if ((src_mcip != NULL) && (mp_chain1 = mac_fix_cksum(mp_chain1)) == NULL) break; FLOW_TRY_REFHOLD(flent, err); if (err != 0) { freemsgchain(mp_chain1); continue; } gen = grp->mbg_clients_gen; rw_exit(&mip->mi_rw_lock); DTRACE_PROBE4(mac__bcast__send__to, mac_client_impl_t *, src_mcip, flow_fn_t, dst_mcip->mci_flent->fe_cb_fn, void *, dst_mcip->mci_flent->fe_cb_arg1, void *, dst_mcip->mci_flent->fe_cb_arg2); (dst_mcip->mci_flent->fe_cb_fn)(dst_mcip->mci_flent->fe_cb_arg1, dst_mcip->mci_flent->fe_cb_arg2, mp_chain1, is_loopback); FLOW_REFRELE(flent); rw_enter(&mip->mi_rw_lock, RW_READER); /* update stats */ if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) dst_mcip->mci_stat_multircv++; else dst_mcip->mci_stat_brdcstrcv++; if (grp->mbg_clients_gen != gen) { /* * The list of MAC clients associated with the group * was changed while the lock was released. * Give up on the current packet. */ rw_exit(&mip->mi_rw_lock); freemsgchain(mp_chain); return; } } rw_exit(&mip->mi_rw_lock); if (src_mcip != NULL) { /* * The packet was sent from one of the MAC clients, * so we need to send a copy of the packet to the * underlying NIC so that it can be sent on the wire. */ mblk_t *rest; src_mcip->mci_stat_multixmt++; src_mcip->mci_stat_brdcstxmt++; rest = MAC_RING_TX_DEFAULT(mip, mp_chain); if (rest != NULL) freemsgchain(rest); } else { freemsgchain(mp_chain); } } /* * Add the specified MAC client to the group corresponding to the specified * broadcast or multicast address. * Return 0 on success, or an errno value on failure. */ int mac_bcast_add(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid, mac_addrtype_t addrtype) { mac_impl_t *mip = mcip->mci_mip; mac_bcast_grp_t *grp = NULL, **last_grp; size_t addr_len = mip->mi_type->mt_addr_length; int rc = 0; int i, index = -1; mac_mcast_addrs_t **prev_mi_addr = NULL; mac_mcast_addrs_t **prev_mci_addr = NULL; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); ASSERT(addrtype == MAC_ADDRTYPE_MULTICAST || addrtype == MAC_ADDRTYPE_BROADCAST); /* * Add the MAC client to the list of MAC clients associated * with the group. */ if (addrtype == MAC_ADDRTYPE_MULTICAST) { mac_mcast_addrs_t *maddr; /* * In case of a driver (say aggr), we need this information * on a per MAC instance basis. */ prev_mi_addr = &mip->mi_mcast_addrs; for (maddr = *prev_mi_addr; maddr != NULL; prev_mi_addr = &maddr->mma_next, maddr = maddr->mma_next) { if (bcmp(maddr->mma_addr, addr, addr_len) == 0) break; } if (maddr == NULL) { /* * For multicast addresses, have the underlying MAC * join the corresponding multicast group. */ rc = mip->mi_multicst(mip->mi_driver, B_TRUE, addr); if (rc != 0) return (rc); maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t), KM_SLEEP); bcopy(addr, maddr->mma_addr, addr_len); *prev_mi_addr = maddr; } else { prev_mi_addr = NULL; } maddr->mma_ref++; /* * We maintain a separate list for each MAC client. Get * the entry or add, if it is not present. */ prev_mci_addr = &mcip->mci_mcast_addrs; for (maddr = *prev_mci_addr; maddr != NULL; prev_mci_addr = &maddr->mma_next, maddr = maddr->mma_next) { if (bcmp(maddr->mma_addr, addr, addr_len) == 0) break; } if (maddr == NULL) { maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t), KM_SLEEP); bcopy(addr, maddr->mma_addr, addr_len); *prev_mci_addr = maddr; } else { prev_mci_addr = NULL; } maddr->mma_ref++; } /* The list is protected by the perimeter */ last_grp = &mip->mi_bcast_grp; for (grp = *last_grp; grp != NULL; last_grp = &grp->mbg_next, grp = grp->mbg_next) { if (bcmp(grp->mbg_addr, addr, addr_len) == 0 && grp->mbg_vid == vid) break; } if (grp == NULL) { /* * The group does not yet exist, create it. */ flow_desc_t flow_desc; char flow_name[MAXFLOWNAMELEN]; grp = kmem_cache_alloc(mac_bcast_grp_cache, KM_SLEEP); bzero(grp, sizeof (mac_bcast_grp_t)); grp->mbg_next = NULL; grp->mbg_mac_impl = mip; DTRACE_PROBE1(mac__bcast__add__new__group, mac_bcast_grp_t *, grp); grp->mbg_addr = kmem_zalloc(addr_len, KM_SLEEP); bcopy(addr, grp->mbg_addr, addr_len); grp->mbg_addrtype = addrtype; grp->mbg_vid = vid; /* * Add a new flow to the underlying MAC. */ bzero(&flow_desc, sizeof (flow_desc)); bcopy(addr, &flow_desc.fd_dst_mac, addr_len); flow_desc.fd_mac_len = (uint32_t)addr_len; flow_desc.fd_mask = FLOW_LINK_DST; if (vid != 0) { flow_desc.fd_vid = vid; flow_desc.fd_mask |= FLOW_LINK_VID; } grp->mbg_id = atomic_add_32_nv(&mac_bcast_id, 1); (void) sprintf(flow_name, "mac/%s/mcast%d", mip->mi_name, grp->mbg_id); rc = mac_flow_create(&flow_desc, NULL, flow_name, grp, FLOW_MCAST, &grp->mbg_flow_ent); if (rc != 0) { kmem_free(grp->mbg_addr, addr_len); kmem_cache_free(mac_bcast_grp_cache, grp); goto fail; } grp->mbg_flow_ent->fe_mbg = grp; mip->mi_bcast_ngrps++; /* * Initial creation reference on the flow. This is released * in the corresponding delete action i_mac_bcast_delete() */ FLOW_REFHOLD(grp->mbg_flow_ent); /* * When the multicast and broadcast packet is received * by the underlying NIC, mac_rx_classify() will invoke * mac_bcast_send() with arg2=NULL, which will cause * mac_bcast_send() to send a copy of the packet(s) * to every MAC client opened on top of the underlying MAC. * * When the mac_bcast_send() function is invoked from * the transmit path of a MAC client, it will specify the * transmitting MAC client as the arg2 value, which will * allow mac_bcast_send() to skip that MAC client and not * send it a copy of the packet. * * We program the classifier to dispatch matching broadcast * packets to mac_bcast_send(). */ grp->mbg_flow_ent->fe_cb_fn = mac_bcast_send; grp->mbg_flow_ent->fe_cb_arg1 = grp; grp->mbg_flow_ent->fe_cb_arg2 = NULL; rc = mac_flow_add(mip->mi_flow_tab, grp->mbg_flow_ent); if (rc != 0) { FLOW_FINAL_REFRELE(grp->mbg_flow_ent); goto fail; } *last_grp = grp; } ASSERT(grp->mbg_addrtype == addrtype); /* * Add the MAC client to the list of MAC clients associated * with the group. */ rw_enter(&mip->mi_rw_lock, RW_WRITER); for (i = 0; i < grp->mbg_nclients_alloc; i++) { /* * The MAC client was already added, say when we have * different unicast addresses with the same vid. * Just increment the ref and we are done. */ if (grp->mbg_clients[i].mgb_client == mcip) { grp->mbg_clients[i].mgb_client_ref++; rw_exit(&mip->mi_rw_lock); return (0); } else if (grp->mbg_clients[i].mgb_client == NULL && index == -1) { index = i; } } if (grp->mbg_nclients_alloc == grp->mbg_nclients) { mac_bcast_grp_mcip_t *new_clients; uint_t new_size = grp->mbg_nclients+1; new_clients = kmem_zalloc(new_size * sizeof (mac_bcast_grp_mcip_t), KM_SLEEP); if (grp->mbg_nclients > 0) { ASSERT(grp->mbg_clients != NULL); bcopy(grp->mbg_clients, new_clients, grp->mbg_nclients * sizeof (mac_bcast_grp_mcip_t)); kmem_free(grp->mbg_clients, grp->mbg_nclients * sizeof (mac_bcast_grp_mcip_t)); } grp->mbg_clients = new_clients; grp->mbg_nclients_alloc = new_size; index = new_size - 1; } ASSERT(index != -1); grp->mbg_clients[index].mgb_client = mcip; grp->mbg_clients[index].mgb_client_ref = 1; grp->mbg_nclients++; /* * Since we're adding to the list of MAC clients using that group, * kick the generation count, which will allow mac_bcast_send() * to detect that condition after re-acquiring the lock. */ grp->mbg_clients_gen++; rw_exit(&mip->mi_rw_lock); return (0); fail: if (prev_mi_addr != NULL) { kmem_free(*prev_mi_addr, sizeof (mac_mcast_addrs_t)); *prev_mi_addr = NULL; (void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr); } if (prev_mci_addr != NULL) { kmem_free(*prev_mci_addr, sizeof (mac_mcast_addrs_t)); *prev_mci_addr = NULL; } return (rc); } /* * Remove the specified MAC client from the group corresponding to * the specific broadcast or multicast address. * * Note: mac_bcast_delete() calls mac_remove_flow() which * will call cv_wait for fe_refcnt to drop to 0. So this function * should not be called from interrupt or STREAMS context. */ void mac_bcast_delete(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid) { mac_impl_t *mip = mcip->mci_mip; mac_bcast_grp_t *grp = NULL, **prev; size_t addr_len = mip->mi_type->mt_addr_length; flow_entry_t *flent; uint_t i; mac_mcast_addrs_t *maddr = NULL; mac_mcast_addrs_t **mprev; ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); /* find the broadcast group. The list is protected by the perimeter */ prev = &mip->mi_bcast_grp; for (grp = mip->mi_bcast_grp; grp != NULL; prev = &grp->mbg_next, grp = grp->mbg_next) { if (bcmp(grp->mbg_addr, addr, addr_len) == 0 && grp->mbg_vid == vid) break; } ASSERT(grp != NULL); /* * Remove the MAC client from the list of MAC clients associated * with that broadcast group. * * We mark the mbg_clients[] location corresponding to the removed MAC * client NULL and reuse that location when we add a new MAC client. */ rw_enter(&mip->mi_rw_lock, RW_WRITER); for (i = 0; i < grp->mbg_nclients_alloc; i++) { if (grp->mbg_clients[i].mgb_client == mcip) break; } ASSERT(i < grp->mbg_nclients_alloc); /* * If there are more references to this MAC client, then we let * it remain till it goes to 0. */ if (--grp->mbg_clients[i].mgb_client_ref > 0) goto update_maddr; grp->mbg_clients[i].mgb_client = NULL; grp->mbg_clients[i].mgb_client_ref = 0; /* * Since we're removing from the list of MAC clients using that group, * kick the generation count, which will allow mac_bcast_send() * to detect that condition. */ grp->mbg_clients_gen++; if (--grp->mbg_nclients == 0) { /* * The last MAC client of the group was just removed. * Unlink the current group from the list of groups * defined on top of the underlying NIC. The group * structure will stay around until the last reference * is dropped. */ *prev = grp->mbg_next; } update_maddr: rw_exit(&mip->mi_rw_lock); if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) { mprev = &mcip->mci_mcast_addrs; for (maddr = mcip->mci_mcast_addrs; maddr != NULL; mprev = &maddr->mma_next, maddr = maddr->mma_next) { if (bcmp(grp->mbg_addr, maddr->mma_addr, mip->mi_type->mt_addr_length) == 0) break; } ASSERT(maddr != NULL); if (--maddr->mma_ref == 0) { *mprev = maddr->mma_next; maddr->mma_next = NULL; kmem_free(maddr, sizeof (mac_mcast_addrs_t)); } mprev = &mip->mi_mcast_addrs; for (maddr = mip->mi_mcast_addrs; maddr != NULL; mprev = &maddr->mma_next, maddr = maddr->mma_next) { if (bcmp(grp->mbg_addr, maddr->mma_addr, mip->mi_type->mt_addr_length) == 0) break; } ASSERT(maddr != NULL); if (--maddr->mma_ref == 0) { (void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr); *mprev = maddr->mma_next; maddr->mma_next = NULL; kmem_free(maddr, sizeof (mac_mcast_addrs_t)); } } /* * If the group itself is being removed, remove the * corresponding flow from the underlying NIC. */ flent = grp->mbg_flow_ent; if (grp->mbg_nclients == 0) { mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE); mac_flow_wait(flent, FLOW_DRIVER_UPCALL); FLOW_FINAL_REFRELE(flent); } } /* * This will be called by a driver, such as aggr, when a port is added/removed * to add/remove the port to/from all the multcast addresses for that aggr. */ void mac_bcast_refresh(mac_impl_t *mip, mac_multicst_t refresh_fn, void *arg, boolean_t add) { mac_mcast_addrs_t *grp, *next; ASSERT(refresh_fn != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); /* * Walk the multicast address list and call the refresh function for * each address. */ for (grp = mip->mi_mcast_addrs; grp != NULL; grp = next) { /* * Save the next pointer just in case the refresh * function's action causes the group entry to be * freed. * We won't be adding to this list as part of the * refresh. */ next = grp->mma_next; refresh_fn(arg, add, grp->mma_addr); } } /* * Walk the MAC client's multicast address list and add/remove the addr/vid * ('arg' is 'flent') to all the addresses. */ void mac_client_bcast_refresh(mac_client_impl_t *mcip, mac_multicst_t refresh_fn, void *arg, boolean_t add) { mac_mcast_addrs_t *grp, *next; mac_impl_t *mip = mcip->mci_mip; ASSERT(refresh_fn != NULL); ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); /* * Walk the multicast address list and call the refresh function for * each address. * Broadcast addresses are not added or removed through the multicast * entry points, so don't include them as part of the refresh. */ for (grp = mcip->mci_mcast_addrs; grp != NULL; grp = next) { /* * Save the next pointer just in case the refresh * function's action causes the group entry to be * freed. * We won't be adding to this list as part of the * refresh. */ next = grp->mma_next; refresh_fn(arg, add, grp->mma_addr); } }