1da14cebeSEric Cheng /* 2da14cebeSEric Cheng * CDDL HEADER START 3da14cebeSEric Cheng * 4da14cebeSEric Cheng * The contents of this file are subject to the terms of the 5da14cebeSEric Cheng * Common Development and Distribution License (the "License"). 6da14cebeSEric Cheng * You may not use this file except in compliance with the License. 7da14cebeSEric Cheng * 8da14cebeSEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9da14cebeSEric Cheng * or http://www.opensolaris.org/os/licensing. 10da14cebeSEric Cheng * See the License for the specific language governing permissions 11da14cebeSEric Cheng * and limitations under the License. 12da14cebeSEric Cheng * 13da14cebeSEric Cheng * When distributing Covered Code, include this CDDL HEADER in each 14da14cebeSEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15da14cebeSEric Cheng * If applicable, add the following below this CDDL HEADER, with the 16da14cebeSEric Cheng * fields enclosed by brackets "[]" replaced with your own identifying 17da14cebeSEric Cheng * information: Portions Copyright [yyyy] [name of copyright owner] 18da14cebeSEric Cheng * 19da14cebeSEric Cheng * CDDL HEADER END 20da14cebeSEric Cheng */ 21da14cebeSEric Cheng /* 220dc2366fSVenugopal Iyer * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23da14cebeSEric Cheng * Use is subject to license terms. 24da14cebeSEric Cheng */ 25da14cebeSEric Cheng 26da14cebeSEric Cheng #include <sys/types.h> 27da14cebeSEric Cheng #include <sys/sysmacros.h> 28da14cebeSEric Cheng #include <sys/conf.h> 29da14cebeSEric Cheng #include <sys/cmn_err.h> 30da14cebeSEric Cheng #include <sys/list.h> 31da14cebeSEric Cheng #include <sys/kmem.h> 32da14cebeSEric Cheng #include <sys/stream.h> 33da14cebeSEric Cheng #include <sys/modctl.h> 34da14cebeSEric Cheng #include <sys/ddi.h> 35da14cebeSEric Cheng #include <sys/sunddi.h> 36da14cebeSEric Cheng #include <sys/atomic.h> 37da14cebeSEric Cheng #include <sys/stat.h> 38da14cebeSEric Cheng #include <sys/modhash.h> 39da14cebeSEric Cheng #include <sys/strsubr.h> 40da14cebeSEric Cheng #include <sys/strsun.h> 41da14cebeSEric Cheng #include <sys/sdt.h> 42da14cebeSEric Cheng #include <sys/mac.h> 43da14cebeSEric Cheng #include <sys/mac_impl.h> 44da14cebeSEric Cheng #include <sys/mac_client_impl.h> 45da14cebeSEric Cheng #include <sys/mac_client_priv.h> 46da14cebeSEric Cheng #include <sys/mac_flow_impl.h> 47da14cebeSEric Cheng 48da14cebeSEric Cheng /* 49da14cebeSEric Cheng * Broadcast and multicast traffic must be distributed to the MAC clients 50da14cebeSEric Cheng * that are defined on top of the same MAC. The set of 51da14cebeSEric Cheng * destinations to which a multicast packet must be sent is a subset 52da14cebeSEric Cheng * of all MAC clients defined on top of the MAC. A MAC client can be member 53da14cebeSEric Cheng * of more than one such subset. 54da14cebeSEric Cheng * 55da14cebeSEric Cheng * To accomodate these requirements, we introduce broadcast groups. 56da14cebeSEric Cheng * A broadcast group is associated with a broadcast or multicast 57da14cebeSEric Cheng * address. The members of a broadcast group consist of the MAC clients 58da14cebeSEric Cheng * that should received copies of packets sent to the address 59da14cebeSEric Cheng * associated with the group, and are defined on top of the 60da14cebeSEric Cheng * same MAC. 61da14cebeSEric Cheng * 62da14cebeSEric Cheng * The broadcast groups defined on top of a MAC are chained, 63da14cebeSEric Cheng * hanging off the mac_impl_t. The broadcast group id's are 64da14cebeSEric Cheng * unique globally (tracked by mac_bcast_id). 65da14cebeSEric Cheng */ 66da14cebeSEric Cheng 67da14cebeSEric Cheng /* 68da14cebeSEric Cheng * The same MAC client may be added for different <addr,vid> tuple, 69da14cebeSEric Cheng * we maintain a ref count for the number of times it has been added 70da14cebeSEric Cheng * to account for deleting the MAC client from the group. 71da14cebeSEric Cheng */ 72da14cebeSEric Cheng typedef struct mac_bcast_grp_mcip_s { 73da14cebeSEric Cheng mac_client_impl_t *mgb_client; 74da14cebeSEric Cheng int mgb_client_ref; 75da14cebeSEric Cheng } mac_bcast_grp_mcip_t; 76da14cebeSEric Cheng 77da14cebeSEric Cheng typedef struct mac_bcast_grp_s { /* Protected by */ 78da14cebeSEric Cheng struct mac_bcast_grp_s *mbg_next; /* SL */ 79da14cebeSEric Cheng void *mbg_addr; /* SL */ 80da14cebeSEric Cheng uint16_t mbg_vid; /* SL */ 81da14cebeSEric Cheng mac_impl_t *mbg_mac_impl; /* WO */ 82da14cebeSEric Cheng mac_addrtype_t mbg_addrtype; /* WO */ 83da14cebeSEric Cheng flow_entry_t *mbg_flow_ent; /* WO */ 84da14cebeSEric Cheng mac_bcast_grp_mcip_t *mbg_clients; /* mi_rw_lock */ 85da14cebeSEric Cheng uint_t mbg_nclients; /* mi_rw_lock */ 86da14cebeSEric Cheng uint_t mbg_nclients_alloc; /* SL */ 87da14cebeSEric Cheng uint64_t mbg_clients_gen; /* mi_rw_lock */ 88da14cebeSEric Cheng uint32_t mbg_id; /* atomic */ 89da14cebeSEric Cheng } mac_bcast_grp_t; 90da14cebeSEric Cheng 91da14cebeSEric Cheng static kmem_cache_t *mac_bcast_grp_cache; 92da14cebeSEric Cheng static uint32_t mac_bcast_id = 0; 93da14cebeSEric Cheng 94da14cebeSEric Cheng void 95da14cebeSEric Cheng mac_bcast_init(void) 96da14cebeSEric Cheng { 97da14cebeSEric Cheng mac_bcast_grp_cache = kmem_cache_create("mac_bcast_grp_cache", 98da14cebeSEric Cheng sizeof (mac_bcast_grp_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 99da14cebeSEric Cheng } 100da14cebeSEric Cheng 101da14cebeSEric Cheng void 102da14cebeSEric Cheng mac_bcast_fini(void) 103da14cebeSEric Cheng { 104da14cebeSEric Cheng kmem_cache_destroy(mac_bcast_grp_cache); 105da14cebeSEric Cheng } 106da14cebeSEric Cheng 107da14cebeSEric Cheng mac_impl_t * 108da14cebeSEric Cheng mac_bcast_grp_mip(void *grp) 109da14cebeSEric Cheng { 110da14cebeSEric Cheng mac_bcast_grp_t *bcast_grp = grp; 111da14cebeSEric Cheng 112da14cebeSEric Cheng return (bcast_grp->mbg_mac_impl); 113da14cebeSEric Cheng } 114da14cebeSEric Cheng 115da14cebeSEric Cheng /* 116da14cebeSEric Cheng * Free the specific broadcast group. Invoked when the last reference 117da14cebeSEric Cheng * to the group is released. 118da14cebeSEric Cheng */ 119da14cebeSEric Cheng void 120da14cebeSEric Cheng mac_bcast_grp_free(void *bcast_grp) 121da14cebeSEric Cheng { 122da14cebeSEric Cheng mac_bcast_grp_t *grp = bcast_grp; 123da14cebeSEric Cheng mac_impl_t *mip = grp->mbg_mac_impl; 124da14cebeSEric Cheng 125da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 126da14cebeSEric Cheng 127da14cebeSEric Cheng ASSERT(grp->mbg_addr != NULL); 128da14cebeSEric Cheng kmem_free(grp->mbg_addr, mip->mi_type->mt_addr_length); 129da14cebeSEric Cheng kmem_free(grp->mbg_clients, 130da14cebeSEric Cheng grp->mbg_nclients_alloc * sizeof (mac_bcast_grp_mcip_t)); 131da14cebeSEric Cheng mip->mi_bcast_ngrps--; 132da14cebeSEric Cheng kmem_cache_free(mac_bcast_grp_cache, grp); 133da14cebeSEric Cheng } 134da14cebeSEric Cheng 135da14cebeSEric Cheng /* 136da14cebeSEric Cheng * arg1: broadcast group 137da14cebeSEric Cheng * arg2: sender MAC client if it is being sent by a MAC client, 138da14cebeSEric Cheng * NULL if it was received from the wire. 139da14cebeSEric Cheng */ 140da14cebeSEric Cheng void 141da14cebeSEric Cheng mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback) 142da14cebeSEric Cheng { 143da14cebeSEric Cheng mac_bcast_grp_t *grp = arg1; 144da14cebeSEric Cheng mac_client_impl_t *src_mcip = arg2, *dst_mcip; 145da14cebeSEric Cheng mac_impl_t *mip = grp->mbg_mac_impl; 146da14cebeSEric Cheng uint64_t gen; 147da14cebeSEric Cheng uint_t i; 148da14cebeSEric Cheng mblk_t *mp_chain1; 149da14cebeSEric Cheng flow_entry_t *flent; 150da14cebeSEric Cheng int err; 151da14cebeSEric Cheng 152da14cebeSEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 153da14cebeSEric Cheng 154da14cebeSEric Cheng /* 155da14cebeSEric Cheng * Pass a copy of the mp chain to every MAC client except the sender 156da14cebeSEric Cheng * MAC client, if the packet was not received from the underlying NIC. 157da14cebeSEric Cheng * 158da14cebeSEric Cheng * The broadcast group lock should not be held across calls to 159da14cebeSEric Cheng * the flow's callback function, since the same group could 160da14cebeSEric Cheng * potentially be accessed from the same context. When the lock 161da14cebeSEric Cheng * is reacquired, changes to the broadcast group while the lock 162da14cebeSEric Cheng * was released are caught using a generation counter incremented 163da14cebeSEric Cheng * each time the list of MAC clients associated with the broadcast 164da14cebeSEric Cheng * group is changed. 165da14cebeSEric Cheng */ 166da14cebeSEric Cheng for (i = 0; i < grp->mbg_nclients_alloc; i++) { 167da14cebeSEric Cheng dst_mcip = grp->mbg_clients[i].mgb_client; 168da14cebeSEric Cheng if (dst_mcip == NULL) 169da14cebeSEric Cheng continue; 170da14cebeSEric Cheng flent = dst_mcip->mci_flent; 171da14cebeSEric Cheng if (flent == NULL || dst_mcip == src_mcip) { 172da14cebeSEric Cheng /* 173da14cebeSEric Cheng * Don't send a copy of the packet back to 174da14cebeSEric Cheng * its sender. 175da14cebeSEric Cheng */ 176da14cebeSEric Cheng continue; 177da14cebeSEric Cheng } 178da14cebeSEric Cheng 179da14cebeSEric Cheng /* 180da14cebeSEric Cheng * It is important to hold a reference on the 181da14cebeSEric Cheng * flow_ent here. 182da14cebeSEric Cheng */ 183da14cebeSEric Cheng if ((mp_chain1 = mac_copymsgchain_cksum(mp_chain)) == NULL) 184da14cebeSEric Cheng break; 185da14cebeSEric Cheng /* 186da14cebeSEric Cheng * Fix the checksum for packets originating 187da14cebeSEric Cheng * from the local machine. 188da14cebeSEric Cheng */ 189da14cebeSEric Cheng if ((src_mcip != NULL) && 190da14cebeSEric Cheng (mp_chain1 = mac_fix_cksum(mp_chain1)) == NULL) 191da14cebeSEric Cheng break; 192da14cebeSEric Cheng 193da14cebeSEric Cheng FLOW_TRY_REFHOLD(flent, err); 194da14cebeSEric Cheng if (err != 0) { 195da14cebeSEric Cheng freemsgchain(mp_chain1); 196da14cebeSEric Cheng continue; 197da14cebeSEric Cheng } 198da14cebeSEric Cheng 199da14cebeSEric Cheng gen = grp->mbg_clients_gen; 200da14cebeSEric Cheng 201da14cebeSEric Cheng rw_exit(&mip->mi_rw_lock); 202da14cebeSEric Cheng 203da14cebeSEric Cheng DTRACE_PROBE4(mac__bcast__send__to, mac_client_impl_t *, 204da14cebeSEric Cheng src_mcip, flow_fn_t, dst_mcip->mci_flent->fe_cb_fn, 205da14cebeSEric Cheng void *, dst_mcip->mci_flent->fe_cb_arg1, 206da14cebeSEric Cheng void *, dst_mcip->mci_flent->fe_cb_arg2); 207da14cebeSEric Cheng 208da14cebeSEric Cheng (dst_mcip->mci_flent->fe_cb_fn)(dst_mcip->mci_flent->fe_cb_arg1, 209da14cebeSEric Cheng dst_mcip->mci_flent->fe_cb_arg2, mp_chain1, is_loopback); 210da14cebeSEric Cheng FLOW_REFRELE(flent); 211da14cebeSEric Cheng 212da14cebeSEric Cheng rw_enter(&mip->mi_rw_lock, RW_READER); 213da14cebeSEric Cheng 214da14cebeSEric Cheng /* update stats */ 2150dc2366fSVenugopal Iyer if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) { 2160dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(dst_mcip, multircv, 1); 2170dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(dst_mcip, multircvbytes, 2180dc2366fSVenugopal Iyer msgdsize(mp_chain)); 2190dc2366fSVenugopal Iyer } else { 2200dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(dst_mcip, brdcstrcv, 1); 2210dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(dst_mcip, brdcstrcvbytes, 2220dc2366fSVenugopal Iyer msgdsize(mp_chain)); 2230dc2366fSVenugopal Iyer } 224da14cebeSEric Cheng 225da14cebeSEric Cheng if (grp->mbg_clients_gen != gen) { 226da14cebeSEric Cheng /* 227da14cebeSEric Cheng * The list of MAC clients associated with the group 228da14cebeSEric Cheng * was changed while the lock was released. 229da14cebeSEric Cheng * Give up on the current packet. 230da14cebeSEric Cheng */ 231da14cebeSEric Cheng rw_exit(&mip->mi_rw_lock); 232da14cebeSEric Cheng freemsgchain(mp_chain); 233da14cebeSEric Cheng return; 234da14cebeSEric Cheng } 235da14cebeSEric Cheng } 236da14cebeSEric Cheng rw_exit(&mip->mi_rw_lock); 237da14cebeSEric Cheng 238da14cebeSEric Cheng if (src_mcip != NULL) { 239da14cebeSEric Cheng /* 240da14cebeSEric Cheng * The packet was sent from one of the MAC clients, 241da14cebeSEric Cheng * so we need to send a copy of the packet to the 242da14cebeSEric Cheng * underlying NIC so that it can be sent on the wire. 243da14cebeSEric Cheng */ 2440dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(src_mcip, multixmt, 1); 2450dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(src_mcip, multixmtbytes, msgdsize(mp_chain)); 2460dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1); 2470dc2366fSVenugopal Iyer MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain)); 248da14cebeSEric Cheng 2490dc2366fSVenugopal Iyer MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip); 2504eaa4710SRishi Srivatsavai if (mp_chain != NULL) 2514eaa4710SRishi Srivatsavai freemsgchain(mp_chain); 252da14cebeSEric Cheng } else { 253da14cebeSEric Cheng freemsgchain(mp_chain); 254da14cebeSEric Cheng } 255da14cebeSEric Cheng } 256da14cebeSEric Cheng 257da14cebeSEric Cheng /* 258da14cebeSEric Cheng * Add the specified MAC client to the group corresponding to the specified 259da14cebeSEric Cheng * broadcast or multicast address. 260da14cebeSEric Cheng * Return 0 on success, or an errno value on failure. 261da14cebeSEric Cheng */ 262da14cebeSEric Cheng int 263da14cebeSEric Cheng mac_bcast_add(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid, 264da14cebeSEric Cheng mac_addrtype_t addrtype) 265da14cebeSEric Cheng { 266da14cebeSEric Cheng mac_impl_t *mip = mcip->mci_mip; 267da14cebeSEric Cheng mac_bcast_grp_t *grp = NULL, **last_grp; 268da14cebeSEric Cheng size_t addr_len = mip->mi_type->mt_addr_length; 269da14cebeSEric Cheng int rc = 0; 270da14cebeSEric Cheng int i, index = -1; 271ae6aa22aSVenugopal Iyer mac_mcast_addrs_t **prev_mi_addr = NULL; 272ae6aa22aSVenugopal Iyer mac_mcast_addrs_t **prev_mci_addr = NULL; 273da14cebeSEric Cheng 274da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 275da14cebeSEric Cheng 276da14cebeSEric Cheng ASSERT(addrtype == MAC_ADDRTYPE_MULTICAST || 277da14cebeSEric Cheng addrtype == MAC_ADDRTYPE_BROADCAST); 278da14cebeSEric Cheng 279ae6aa22aSVenugopal Iyer /* 280ae6aa22aSVenugopal Iyer * Add the MAC client to the list of MAC clients associated 281ae6aa22aSVenugopal Iyer * with the group. 282ae6aa22aSVenugopal Iyer */ 283ae6aa22aSVenugopal Iyer if (addrtype == MAC_ADDRTYPE_MULTICAST) { 284ae6aa22aSVenugopal Iyer mac_mcast_addrs_t *maddr; 285ae6aa22aSVenugopal Iyer 286ae6aa22aSVenugopal Iyer /* 287ae6aa22aSVenugopal Iyer * In case of a driver (say aggr), we need this information 288ae6aa22aSVenugopal Iyer * on a per MAC instance basis. 289ae6aa22aSVenugopal Iyer */ 290ae6aa22aSVenugopal Iyer prev_mi_addr = &mip->mi_mcast_addrs; 291ae6aa22aSVenugopal Iyer for (maddr = *prev_mi_addr; maddr != NULL; 292ae6aa22aSVenugopal Iyer prev_mi_addr = &maddr->mma_next, maddr = maddr->mma_next) { 293ae6aa22aSVenugopal Iyer if (bcmp(maddr->mma_addr, addr, addr_len) == 0) 294ae6aa22aSVenugopal Iyer break; 295ae6aa22aSVenugopal Iyer } 296ae6aa22aSVenugopal Iyer if (maddr == NULL) { 297ae6aa22aSVenugopal Iyer /* 298ae6aa22aSVenugopal Iyer * For multicast addresses, have the underlying MAC 299ae6aa22aSVenugopal Iyer * join the corresponding multicast group. 300ae6aa22aSVenugopal Iyer */ 301ae6aa22aSVenugopal Iyer rc = mip->mi_multicst(mip->mi_driver, B_TRUE, addr); 302ae6aa22aSVenugopal Iyer if (rc != 0) 303ae6aa22aSVenugopal Iyer return (rc); 304ae6aa22aSVenugopal Iyer maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t), 305ae6aa22aSVenugopal Iyer KM_SLEEP); 306ae6aa22aSVenugopal Iyer bcopy(addr, maddr->mma_addr, addr_len); 307ae6aa22aSVenugopal Iyer *prev_mi_addr = maddr; 308ae6aa22aSVenugopal Iyer } else { 309ae6aa22aSVenugopal Iyer prev_mi_addr = NULL; 310ae6aa22aSVenugopal Iyer } 311ae6aa22aSVenugopal Iyer maddr->mma_ref++; 312ae6aa22aSVenugopal Iyer 313ae6aa22aSVenugopal Iyer /* 314ae6aa22aSVenugopal Iyer * We maintain a separate list for each MAC client. Get 315ae6aa22aSVenugopal Iyer * the entry or add, if it is not present. 316ae6aa22aSVenugopal Iyer */ 317ae6aa22aSVenugopal Iyer prev_mci_addr = &mcip->mci_mcast_addrs; 318ae6aa22aSVenugopal Iyer for (maddr = *prev_mci_addr; maddr != NULL; 319ae6aa22aSVenugopal Iyer prev_mci_addr = &maddr->mma_next, maddr = maddr->mma_next) { 320ae6aa22aSVenugopal Iyer if (bcmp(maddr->mma_addr, addr, addr_len) == 0) 321ae6aa22aSVenugopal Iyer break; 322ae6aa22aSVenugopal Iyer } 323ae6aa22aSVenugopal Iyer if (maddr == NULL) { 324ae6aa22aSVenugopal Iyer maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t), 325ae6aa22aSVenugopal Iyer KM_SLEEP); 326ae6aa22aSVenugopal Iyer bcopy(addr, maddr->mma_addr, addr_len); 327ae6aa22aSVenugopal Iyer *prev_mci_addr = maddr; 328ae6aa22aSVenugopal Iyer } else { 329ae6aa22aSVenugopal Iyer prev_mci_addr = NULL; 330ae6aa22aSVenugopal Iyer } 331ae6aa22aSVenugopal Iyer maddr->mma_ref++; 332ae6aa22aSVenugopal Iyer } 333ae6aa22aSVenugopal Iyer 334da14cebeSEric Cheng /* The list is protected by the perimeter */ 335da14cebeSEric Cheng last_grp = &mip->mi_bcast_grp; 336da14cebeSEric Cheng for (grp = *last_grp; grp != NULL; 337da14cebeSEric Cheng last_grp = &grp->mbg_next, grp = grp->mbg_next) { 338da14cebeSEric Cheng if (bcmp(grp->mbg_addr, addr, addr_len) == 0 && 339da14cebeSEric Cheng grp->mbg_vid == vid) 340da14cebeSEric Cheng break; 341da14cebeSEric Cheng } 342da14cebeSEric Cheng 343da14cebeSEric Cheng if (grp == NULL) { 344da14cebeSEric Cheng /* 345da14cebeSEric Cheng * The group does not yet exist, create it. 346da14cebeSEric Cheng */ 347da14cebeSEric Cheng flow_desc_t flow_desc; 348da000602SGirish Moodalbail char flow_name[MAXFLOWNAMELEN]; 349da14cebeSEric Cheng 350da14cebeSEric Cheng grp = kmem_cache_alloc(mac_bcast_grp_cache, KM_SLEEP); 351da14cebeSEric Cheng bzero(grp, sizeof (mac_bcast_grp_t)); 352da14cebeSEric Cheng grp->mbg_next = NULL; 353da14cebeSEric Cheng grp->mbg_mac_impl = mip; 354da14cebeSEric Cheng 355da14cebeSEric Cheng DTRACE_PROBE1(mac__bcast__add__new__group, mac_bcast_grp_t *, 356da14cebeSEric Cheng grp); 357da14cebeSEric Cheng 358da14cebeSEric Cheng grp->mbg_addr = kmem_zalloc(addr_len, KM_SLEEP); 359da14cebeSEric Cheng bcopy(addr, grp->mbg_addr, addr_len); 360da14cebeSEric Cheng grp->mbg_addrtype = addrtype; 361da14cebeSEric Cheng grp->mbg_vid = vid; 362da14cebeSEric Cheng 363da14cebeSEric Cheng /* 364da14cebeSEric Cheng * Add a new flow to the underlying MAC. 365da14cebeSEric Cheng */ 366da14cebeSEric Cheng bzero(&flow_desc, sizeof (flow_desc)); 367da14cebeSEric Cheng bcopy(addr, &flow_desc.fd_dst_mac, addr_len); 368da14cebeSEric Cheng flow_desc.fd_mac_len = (uint32_t)addr_len; 369da14cebeSEric Cheng 370da14cebeSEric Cheng flow_desc.fd_mask = FLOW_LINK_DST; 371da14cebeSEric Cheng if (vid != 0) { 372da14cebeSEric Cheng flow_desc.fd_vid = vid; 373da14cebeSEric Cheng flow_desc.fd_mask |= FLOW_LINK_VID; 374da14cebeSEric Cheng } 375da14cebeSEric Cheng 376*1a5e258fSJosef 'Jeff' Sipek grp->mbg_id = atomic_inc_32_nv(&mac_bcast_id); 377da14cebeSEric Cheng (void) sprintf(flow_name, 378da14cebeSEric Cheng "mac/%s/mcast%d", mip->mi_name, grp->mbg_id); 379da14cebeSEric Cheng 380da14cebeSEric Cheng rc = mac_flow_create(&flow_desc, NULL, flow_name, 381da14cebeSEric Cheng grp, FLOW_MCAST, &grp->mbg_flow_ent); 382da14cebeSEric Cheng if (rc != 0) { 383da14cebeSEric Cheng kmem_free(grp->mbg_addr, addr_len); 384da14cebeSEric Cheng kmem_cache_free(mac_bcast_grp_cache, grp); 385ae6aa22aSVenugopal Iyer goto fail; 386da14cebeSEric Cheng } 387da14cebeSEric Cheng grp->mbg_flow_ent->fe_mbg = grp; 388da14cebeSEric Cheng mip->mi_bcast_ngrps++; 389da14cebeSEric Cheng 390da14cebeSEric Cheng /* 391da14cebeSEric Cheng * Initial creation reference on the flow. This is released 392da14cebeSEric Cheng * in the corresponding delete action i_mac_bcast_delete() 393da14cebeSEric Cheng */ 394da14cebeSEric Cheng FLOW_REFHOLD(grp->mbg_flow_ent); 395da14cebeSEric Cheng 396da14cebeSEric Cheng /* 397da14cebeSEric Cheng * When the multicast and broadcast packet is received 398da14cebeSEric Cheng * by the underlying NIC, mac_rx_classify() will invoke 399da14cebeSEric Cheng * mac_bcast_send() with arg2=NULL, which will cause 400da14cebeSEric Cheng * mac_bcast_send() to send a copy of the packet(s) 401da14cebeSEric Cheng * to every MAC client opened on top of the underlying MAC. 402da14cebeSEric Cheng * 403da14cebeSEric Cheng * When the mac_bcast_send() function is invoked from 404da14cebeSEric Cheng * the transmit path of a MAC client, it will specify the 405da14cebeSEric Cheng * transmitting MAC client as the arg2 value, which will 406da14cebeSEric Cheng * allow mac_bcast_send() to skip that MAC client and not 407da14cebeSEric Cheng * send it a copy of the packet. 408da14cebeSEric Cheng * 409da14cebeSEric Cheng * We program the classifier to dispatch matching broadcast 410da14cebeSEric Cheng * packets to mac_bcast_send(). 411da14cebeSEric Cheng */ 412da14cebeSEric Cheng 413da14cebeSEric Cheng grp->mbg_flow_ent->fe_cb_fn = mac_bcast_send; 414da14cebeSEric Cheng grp->mbg_flow_ent->fe_cb_arg1 = grp; 415da14cebeSEric Cheng grp->mbg_flow_ent->fe_cb_arg2 = NULL; 416da14cebeSEric Cheng 417da14cebeSEric Cheng rc = mac_flow_add(mip->mi_flow_tab, grp->mbg_flow_ent); 418da14cebeSEric Cheng if (rc != 0) { 419da14cebeSEric Cheng FLOW_FINAL_REFRELE(grp->mbg_flow_ent); 420ae6aa22aSVenugopal Iyer goto fail; 421da14cebeSEric Cheng } 422da14cebeSEric Cheng 423da14cebeSEric Cheng *last_grp = grp; 424da14cebeSEric Cheng } 425da14cebeSEric Cheng 426da14cebeSEric Cheng ASSERT(grp->mbg_addrtype == addrtype); 427da14cebeSEric Cheng 428da14cebeSEric Cheng /* 429da14cebeSEric Cheng * Add the MAC client to the list of MAC clients associated 430da14cebeSEric Cheng * with the group. 431da14cebeSEric Cheng */ 432da14cebeSEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 433da14cebeSEric Cheng for (i = 0; i < grp->mbg_nclients_alloc; i++) { 434da14cebeSEric Cheng /* 435da14cebeSEric Cheng * The MAC client was already added, say when we have 436da14cebeSEric Cheng * different unicast addresses with the same vid. 437da14cebeSEric Cheng * Just increment the ref and we are done. 438da14cebeSEric Cheng */ 439da14cebeSEric Cheng if (grp->mbg_clients[i].mgb_client == mcip) { 440da14cebeSEric Cheng grp->mbg_clients[i].mgb_client_ref++; 441ae6aa22aSVenugopal Iyer rw_exit(&mip->mi_rw_lock); 442ae6aa22aSVenugopal Iyer return (0); 443da14cebeSEric Cheng } else if (grp->mbg_clients[i].mgb_client == NULL && 444da14cebeSEric Cheng index == -1) { 445da14cebeSEric Cheng index = i; 446da14cebeSEric Cheng } 447da14cebeSEric Cheng } 448da14cebeSEric Cheng if (grp->mbg_nclients_alloc == grp->mbg_nclients) { 449da14cebeSEric Cheng mac_bcast_grp_mcip_t *new_clients; 450da14cebeSEric Cheng uint_t new_size = grp->mbg_nclients+1; 451da14cebeSEric Cheng 452da14cebeSEric Cheng new_clients = kmem_zalloc(new_size * 453da14cebeSEric Cheng sizeof (mac_bcast_grp_mcip_t), KM_SLEEP); 454da14cebeSEric Cheng 455da14cebeSEric Cheng if (grp->mbg_nclients > 0) { 456da14cebeSEric Cheng ASSERT(grp->mbg_clients != NULL); 457da14cebeSEric Cheng bcopy(grp->mbg_clients, new_clients, grp->mbg_nclients * 458da14cebeSEric Cheng sizeof (mac_bcast_grp_mcip_t)); 459da14cebeSEric Cheng kmem_free(grp->mbg_clients, grp->mbg_nclients * 460da14cebeSEric Cheng sizeof (mac_bcast_grp_mcip_t)); 461da14cebeSEric Cheng } 462da14cebeSEric Cheng 463da14cebeSEric Cheng grp->mbg_clients = new_clients; 464da14cebeSEric Cheng grp->mbg_nclients_alloc = new_size; 465da14cebeSEric Cheng index = new_size - 1; 466da14cebeSEric Cheng } 467da14cebeSEric Cheng 468da14cebeSEric Cheng ASSERT(index != -1); 469da14cebeSEric Cheng grp->mbg_clients[index].mgb_client = mcip; 470da14cebeSEric Cheng grp->mbg_clients[index].mgb_client_ref = 1; 471da14cebeSEric Cheng grp->mbg_nclients++; 472da14cebeSEric Cheng /* 473da14cebeSEric Cheng * Since we're adding to the list of MAC clients using that group, 474da14cebeSEric Cheng * kick the generation count, which will allow mac_bcast_send() 475da14cebeSEric Cheng * to detect that condition after re-acquiring the lock. 476da14cebeSEric Cheng */ 477da14cebeSEric Cheng grp->mbg_clients_gen++; 478da14cebeSEric Cheng rw_exit(&mip->mi_rw_lock); 479da14cebeSEric Cheng return (0); 480ae6aa22aSVenugopal Iyer 481ae6aa22aSVenugopal Iyer fail: 482ae6aa22aSVenugopal Iyer if (prev_mi_addr != NULL) { 483ae6aa22aSVenugopal Iyer kmem_free(*prev_mi_addr, sizeof (mac_mcast_addrs_t)); 484ae6aa22aSVenugopal Iyer *prev_mi_addr = NULL; 485ae6aa22aSVenugopal Iyer (void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr); 486ae6aa22aSVenugopal Iyer } 487ae6aa22aSVenugopal Iyer if (prev_mci_addr != NULL) { 488ae6aa22aSVenugopal Iyer kmem_free(*prev_mci_addr, sizeof (mac_mcast_addrs_t)); 489ae6aa22aSVenugopal Iyer *prev_mci_addr = NULL; 490ae6aa22aSVenugopal Iyer } 491ae6aa22aSVenugopal Iyer return (rc); 492da14cebeSEric Cheng } 493da14cebeSEric Cheng 494da14cebeSEric Cheng /* 495da14cebeSEric Cheng * Remove the specified MAC client from the group corresponding to 496da14cebeSEric Cheng * the specific broadcast or multicast address. 497da14cebeSEric Cheng * 498da14cebeSEric Cheng * Note: mac_bcast_delete() calls mac_remove_flow() which 499da14cebeSEric Cheng * will call cv_wait for fe_refcnt to drop to 0. So this function 500da14cebeSEric Cheng * should not be called from interrupt or STREAMS context. 501da14cebeSEric Cheng */ 502da14cebeSEric Cheng void 503da14cebeSEric Cheng mac_bcast_delete(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid) 504da14cebeSEric Cheng { 505da14cebeSEric Cheng mac_impl_t *mip = mcip->mci_mip; 506da14cebeSEric Cheng mac_bcast_grp_t *grp = NULL, **prev; 507da14cebeSEric Cheng size_t addr_len = mip->mi_type->mt_addr_length; 508da14cebeSEric Cheng flow_entry_t *flent; 509da14cebeSEric Cheng uint_t i; 510da14cebeSEric Cheng mac_mcast_addrs_t *maddr = NULL; 511da14cebeSEric Cheng mac_mcast_addrs_t **mprev; 512da14cebeSEric Cheng 513da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 514da14cebeSEric Cheng 515da14cebeSEric Cheng /* find the broadcast group. The list is protected by the perimeter */ 516da14cebeSEric Cheng prev = &mip->mi_bcast_grp; 517da14cebeSEric Cheng for (grp = mip->mi_bcast_grp; grp != NULL; prev = &grp->mbg_next, 518da14cebeSEric Cheng grp = grp->mbg_next) { 519da14cebeSEric Cheng if (bcmp(grp->mbg_addr, addr, addr_len) == 0 && 520da14cebeSEric Cheng grp->mbg_vid == vid) 521da14cebeSEric Cheng break; 522da14cebeSEric Cheng } 523da14cebeSEric Cheng ASSERT(grp != NULL); 524da14cebeSEric Cheng 525da14cebeSEric Cheng /* 526da14cebeSEric Cheng * Remove the MAC client from the list of MAC clients associated 527da14cebeSEric Cheng * with that broadcast group. 528da14cebeSEric Cheng * 529da14cebeSEric Cheng * We mark the mbg_clients[] location corresponding to the removed MAC 530da14cebeSEric Cheng * client NULL and reuse that location when we add a new MAC client. 531da14cebeSEric Cheng */ 532da14cebeSEric Cheng 533da14cebeSEric Cheng rw_enter(&mip->mi_rw_lock, RW_WRITER); 534da14cebeSEric Cheng 535da14cebeSEric Cheng for (i = 0; i < grp->mbg_nclients_alloc; i++) { 536da14cebeSEric Cheng if (grp->mbg_clients[i].mgb_client == mcip) 537da14cebeSEric Cheng break; 538da14cebeSEric Cheng } 539da14cebeSEric Cheng 540da14cebeSEric Cheng ASSERT(i < grp->mbg_nclients_alloc); 541da14cebeSEric Cheng /* 542da14cebeSEric Cheng * If there are more references to this MAC client, then we let 543da14cebeSEric Cheng * it remain till it goes to 0. 544da14cebeSEric Cheng */ 545da14cebeSEric Cheng if (--grp->mbg_clients[i].mgb_client_ref > 0) 546da14cebeSEric Cheng goto update_maddr; 547da14cebeSEric Cheng 548da14cebeSEric Cheng grp->mbg_clients[i].mgb_client = NULL; 549da14cebeSEric Cheng grp->mbg_clients[i].mgb_client_ref = 0; 550da14cebeSEric Cheng 551da14cebeSEric Cheng /* 552da14cebeSEric Cheng * Since we're removing from the list of MAC clients using that group, 553da14cebeSEric Cheng * kick the generation count, which will allow mac_bcast_send() 554da14cebeSEric Cheng * to detect that condition. 555da14cebeSEric Cheng */ 556da14cebeSEric Cheng grp->mbg_clients_gen++; 557da14cebeSEric Cheng 558da14cebeSEric Cheng if (--grp->mbg_nclients == 0) { 559da14cebeSEric Cheng /* 560da14cebeSEric Cheng * The last MAC client of the group was just removed. 561da14cebeSEric Cheng * Unlink the current group from the list of groups 562da14cebeSEric Cheng * defined on top of the underlying NIC. The group 563da14cebeSEric Cheng * structure will stay around until the last reference 564da14cebeSEric Cheng * is dropped. 565da14cebeSEric Cheng */ 566da14cebeSEric Cheng *prev = grp->mbg_next; 567da14cebeSEric Cheng } 568da14cebeSEric Cheng update_maddr: 569ae6aa22aSVenugopal Iyer rw_exit(&mip->mi_rw_lock); 570ae6aa22aSVenugopal Iyer 571da14cebeSEric Cheng if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) { 572da14cebeSEric Cheng mprev = &mcip->mci_mcast_addrs; 573da14cebeSEric Cheng for (maddr = mcip->mci_mcast_addrs; maddr != NULL; 574da14cebeSEric Cheng mprev = &maddr->mma_next, maddr = maddr->mma_next) { 575da14cebeSEric Cheng if (bcmp(grp->mbg_addr, maddr->mma_addr, 576da14cebeSEric Cheng mip->mi_type->mt_addr_length) == 0) 577da14cebeSEric Cheng break; 578da14cebeSEric Cheng } 579da14cebeSEric Cheng ASSERT(maddr != NULL); 580da14cebeSEric Cheng if (--maddr->mma_ref == 0) { 581da14cebeSEric Cheng *mprev = maddr->mma_next; 582da14cebeSEric Cheng maddr->mma_next = NULL; 583da14cebeSEric Cheng kmem_free(maddr, sizeof (mac_mcast_addrs_t)); 584da14cebeSEric Cheng } 585da14cebeSEric Cheng 586da14cebeSEric Cheng mprev = &mip->mi_mcast_addrs; 587da14cebeSEric Cheng for (maddr = mip->mi_mcast_addrs; maddr != NULL; 588da14cebeSEric Cheng mprev = &maddr->mma_next, maddr = maddr->mma_next) { 589da14cebeSEric Cheng if (bcmp(grp->mbg_addr, maddr->mma_addr, 590da14cebeSEric Cheng mip->mi_type->mt_addr_length) == 0) 591da14cebeSEric Cheng break; 592da14cebeSEric Cheng } 593da14cebeSEric Cheng ASSERT(maddr != NULL); 594da14cebeSEric Cheng if (--maddr->mma_ref == 0) { 595ae6aa22aSVenugopal Iyer (void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr); 596da14cebeSEric Cheng *mprev = maddr->mma_next; 597da14cebeSEric Cheng maddr->mma_next = NULL; 598da14cebeSEric Cheng kmem_free(maddr, sizeof (mac_mcast_addrs_t)); 599da14cebeSEric Cheng } 600da14cebeSEric Cheng } 601da14cebeSEric Cheng 602da14cebeSEric Cheng /* 603da14cebeSEric Cheng * If the group itself is being removed, remove the 604da14cebeSEric Cheng * corresponding flow from the underlying NIC. 605da14cebeSEric Cheng */ 606da14cebeSEric Cheng flent = grp->mbg_flow_ent; 607da14cebeSEric Cheng if (grp->mbg_nclients == 0) { 608da14cebeSEric Cheng mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE); 609da14cebeSEric Cheng mac_flow_wait(flent, FLOW_DRIVER_UPCALL); 610da14cebeSEric Cheng FLOW_FINAL_REFRELE(flent); 611da14cebeSEric Cheng } 612da14cebeSEric Cheng } 613da14cebeSEric Cheng 614da14cebeSEric Cheng /* 615da14cebeSEric Cheng * This will be called by a driver, such as aggr, when a port is added/removed 616da14cebeSEric Cheng * to add/remove the port to/from all the multcast addresses for that aggr. 617da14cebeSEric Cheng */ 618da14cebeSEric Cheng void 619da14cebeSEric Cheng mac_bcast_refresh(mac_impl_t *mip, mac_multicst_t refresh_fn, void *arg, 620da14cebeSEric Cheng boolean_t add) 621da14cebeSEric Cheng { 622da14cebeSEric Cheng mac_mcast_addrs_t *grp, *next; 623da14cebeSEric Cheng 624da14cebeSEric Cheng ASSERT(refresh_fn != NULL); 625da14cebeSEric Cheng 626da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 627da14cebeSEric Cheng 628da14cebeSEric Cheng /* 629da14cebeSEric Cheng * Walk the multicast address list and call the refresh function for 630da14cebeSEric Cheng * each address. 631da14cebeSEric Cheng */ 632da14cebeSEric Cheng 633da14cebeSEric Cheng for (grp = mip->mi_mcast_addrs; grp != NULL; grp = next) { 634da14cebeSEric Cheng /* 635da14cebeSEric Cheng * Save the next pointer just in case the refresh 636da14cebeSEric Cheng * function's action causes the group entry to be 637da14cebeSEric Cheng * freed. 638da14cebeSEric Cheng * We won't be adding to this list as part of the 639da14cebeSEric Cheng * refresh. 640da14cebeSEric Cheng */ 641da14cebeSEric Cheng next = grp->mma_next; 642da14cebeSEric Cheng refresh_fn(arg, add, grp->mma_addr); 643da14cebeSEric Cheng } 644da14cebeSEric Cheng } 645da14cebeSEric Cheng 646da14cebeSEric Cheng /* 647da14cebeSEric Cheng * Walk the MAC client's multicast address list and add/remove the addr/vid 648da14cebeSEric Cheng * ('arg' is 'flent') to all the addresses. 649da14cebeSEric Cheng */ 650da14cebeSEric Cheng void 651da14cebeSEric Cheng mac_client_bcast_refresh(mac_client_impl_t *mcip, mac_multicst_t refresh_fn, 652da14cebeSEric Cheng void *arg, boolean_t add) 653da14cebeSEric Cheng { 654da14cebeSEric Cheng mac_mcast_addrs_t *grp, *next; 655da14cebeSEric Cheng mac_impl_t *mip = mcip->mci_mip; 656da14cebeSEric Cheng 657da14cebeSEric Cheng ASSERT(refresh_fn != NULL); 658da14cebeSEric Cheng 659da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD((mac_handle_t)mip)); 660da14cebeSEric Cheng /* 661da14cebeSEric Cheng * Walk the multicast address list and call the refresh function for 662da14cebeSEric Cheng * each address. 663da14cebeSEric Cheng * Broadcast addresses are not added or removed through the multicast 664da14cebeSEric Cheng * entry points, so don't include them as part of the refresh. 665da14cebeSEric Cheng */ 666da14cebeSEric Cheng for (grp = mcip->mci_mcast_addrs; grp != NULL; grp = next) { 667da14cebeSEric Cheng /* 668da14cebeSEric Cheng * Save the next pointer just in case the refresh 669da14cebeSEric Cheng * function's action causes the group entry to be 670da14cebeSEric Cheng * freed. 671da14cebeSEric Cheng * We won't be adding to this list as part of the 672da14cebeSEric Cheng * refresh. 673da14cebeSEric Cheng */ 674da14cebeSEric Cheng next = grp->mma_next; 675da14cebeSEric Cheng refresh_fn(arg, add, grp->mma_addr); 676da14cebeSEric Cheng } 677da14cebeSEric Cheng } 678