xref: /titanic_44/usr/src/uts/common/io/mac/mac_bcast.c (revision 1a5e258f5471356ca102c7176637cdce45bac147)
1da14cebeSEric Cheng /*
2da14cebeSEric Cheng  * CDDL HEADER START
3da14cebeSEric Cheng  *
4da14cebeSEric Cheng  * The contents of this file are subject to the terms of the
5da14cebeSEric Cheng  * Common Development and Distribution License (the "License").
6da14cebeSEric Cheng  * You may not use this file except in compliance with the License.
7da14cebeSEric Cheng  *
8da14cebeSEric Cheng  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9da14cebeSEric Cheng  * or http://www.opensolaris.org/os/licensing.
10da14cebeSEric Cheng  * See the License for the specific language governing permissions
11da14cebeSEric Cheng  * and limitations under the License.
12da14cebeSEric Cheng  *
13da14cebeSEric Cheng  * When distributing Covered Code, include this CDDL HEADER in each
14da14cebeSEric Cheng  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15da14cebeSEric Cheng  * If applicable, add the following below this CDDL HEADER, with the
16da14cebeSEric Cheng  * fields enclosed by brackets "[]" replaced with your own identifying
17da14cebeSEric Cheng  * information: Portions Copyright [yyyy] [name of copyright owner]
18da14cebeSEric Cheng  *
19da14cebeSEric Cheng  * CDDL HEADER END
20da14cebeSEric Cheng  */
21da14cebeSEric Cheng /*
220dc2366fSVenugopal Iyer  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23da14cebeSEric Cheng  * Use is subject to license terms.
24da14cebeSEric Cheng  */
25da14cebeSEric Cheng 
26da14cebeSEric Cheng #include <sys/types.h>
27da14cebeSEric Cheng #include <sys/sysmacros.h>
28da14cebeSEric Cheng #include <sys/conf.h>
29da14cebeSEric Cheng #include <sys/cmn_err.h>
30da14cebeSEric Cheng #include <sys/list.h>
31da14cebeSEric Cheng #include <sys/kmem.h>
32da14cebeSEric Cheng #include <sys/stream.h>
33da14cebeSEric Cheng #include <sys/modctl.h>
34da14cebeSEric Cheng #include <sys/ddi.h>
35da14cebeSEric Cheng #include <sys/sunddi.h>
36da14cebeSEric Cheng #include <sys/atomic.h>
37da14cebeSEric Cheng #include <sys/stat.h>
38da14cebeSEric Cheng #include <sys/modhash.h>
39da14cebeSEric Cheng #include <sys/strsubr.h>
40da14cebeSEric Cheng #include <sys/strsun.h>
41da14cebeSEric Cheng #include <sys/sdt.h>
42da14cebeSEric Cheng #include <sys/mac.h>
43da14cebeSEric Cheng #include <sys/mac_impl.h>
44da14cebeSEric Cheng #include <sys/mac_client_impl.h>
45da14cebeSEric Cheng #include <sys/mac_client_priv.h>
46da14cebeSEric Cheng #include <sys/mac_flow_impl.h>
47da14cebeSEric Cheng 
48da14cebeSEric Cheng /*
49da14cebeSEric Cheng  * Broadcast and multicast traffic must be distributed to the MAC clients
50da14cebeSEric Cheng  * that are defined on top of the same MAC. The set of
51da14cebeSEric Cheng  * destinations to which a multicast packet must be sent is a subset
52da14cebeSEric Cheng  * of all MAC clients defined on top of the MAC. A MAC client can be member
53da14cebeSEric Cheng  * of more than one such subset.
54da14cebeSEric Cheng  *
55da14cebeSEric Cheng  * To accomodate these requirements, we introduce broadcast groups.
56da14cebeSEric Cheng  * A broadcast group is associated with a broadcast or multicast
57da14cebeSEric Cheng  * address. The members of a broadcast group consist of the MAC clients
58da14cebeSEric Cheng  * that should received copies of packets sent to the address
59da14cebeSEric Cheng  * associated with the group, and are defined on top of the
60da14cebeSEric Cheng  * same MAC.
61da14cebeSEric Cheng  *
62da14cebeSEric Cheng  * The broadcast groups defined on top of a MAC are chained,
63da14cebeSEric Cheng  * hanging off the mac_impl_t. The broadcast group id's are
64da14cebeSEric Cheng  * unique globally (tracked by mac_bcast_id).
65da14cebeSEric Cheng  */
66da14cebeSEric Cheng 
67da14cebeSEric Cheng /*
68da14cebeSEric Cheng  * The same MAC client may be added for different <addr,vid> tuple,
69da14cebeSEric Cheng  * we maintain a ref count for the number of times it has been added
70da14cebeSEric Cheng  * to account for deleting the MAC client from the group.
71da14cebeSEric Cheng  */
72da14cebeSEric Cheng typedef struct mac_bcast_grp_mcip_s {
73da14cebeSEric Cheng 	mac_client_impl_t	*mgb_client;
74da14cebeSEric Cheng 	int			mgb_client_ref;
75da14cebeSEric Cheng } mac_bcast_grp_mcip_t;
76da14cebeSEric Cheng 
77da14cebeSEric Cheng typedef struct mac_bcast_grp_s {			/* Protected by */
78da14cebeSEric Cheng 	struct mac_bcast_grp_s	*mbg_next;		/* SL */
79da14cebeSEric Cheng 	void			*mbg_addr;		/* SL */
80da14cebeSEric Cheng 	uint16_t		mbg_vid;		/* SL */
81da14cebeSEric Cheng 	mac_impl_t		*mbg_mac_impl;		/* WO */
82da14cebeSEric Cheng 	mac_addrtype_t		mbg_addrtype;		/* WO */
83da14cebeSEric Cheng 	flow_entry_t		*mbg_flow_ent;		/* WO */
84da14cebeSEric Cheng 	mac_bcast_grp_mcip_t	*mbg_clients;		/* mi_rw_lock */
85da14cebeSEric Cheng 	uint_t			mbg_nclients;		/* mi_rw_lock */
86da14cebeSEric Cheng 	uint_t			mbg_nclients_alloc;	/* SL */
87da14cebeSEric Cheng 	uint64_t		mbg_clients_gen;	/* mi_rw_lock */
88da14cebeSEric Cheng 	uint32_t		mbg_id;			/* atomic */
89da14cebeSEric Cheng } mac_bcast_grp_t;
90da14cebeSEric Cheng 
91da14cebeSEric Cheng static kmem_cache_t *mac_bcast_grp_cache;
92da14cebeSEric Cheng static uint32_t mac_bcast_id = 0;
93da14cebeSEric Cheng 
94da14cebeSEric Cheng void
mac_bcast_init(void)95da14cebeSEric Cheng mac_bcast_init(void)
96da14cebeSEric Cheng {
97da14cebeSEric Cheng 	mac_bcast_grp_cache = kmem_cache_create("mac_bcast_grp_cache",
98da14cebeSEric Cheng 	    sizeof (mac_bcast_grp_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
99da14cebeSEric Cheng }
100da14cebeSEric Cheng 
101da14cebeSEric Cheng void
mac_bcast_fini(void)102da14cebeSEric Cheng mac_bcast_fini(void)
103da14cebeSEric Cheng {
104da14cebeSEric Cheng 	kmem_cache_destroy(mac_bcast_grp_cache);
105da14cebeSEric Cheng }
106da14cebeSEric Cheng 
107da14cebeSEric Cheng mac_impl_t *
mac_bcast_grp_mip(void * grp)108da14cebeSEric Cheng mac_bcast_grp_mip(void *grp)
109da14cebeSEric Cheng {
110da14cebeSEric Cheng 	mac_bcast_grp_t *bcast_grp = grp;
111da14cebeSEric Cheng 
112da14cebeSEric Cheng 	return (bcast_grp->mbg_mac_impl);
113da14cebeSEric Cheng }
114da14cebeSEric Cheng 
115da14cebeSEric Cheng /*
116da14cebeSEric Cheng  * Free the specific broadcast group. Invoked when the last reference
117da14cebeSEric Cheng  * to the group is released.
118da14cebeSEric Cheng  */
119da14cebeSEric Cheng void
mac_bcast_grp_free(void * bcast_grp)120da14cebeSEric Cheng mac_bcast_grp_free(void *bcast_grp)
121da14cebeSEric Cheng {
122da14cebeSEric Cheng 	mac_bcast_grp_t	*grp = bcast_grp;
123da14cebeSEric Cheng 	mac_impl_t *mip = grp->mbg_mac_impl;
124da14cebeSEric Cheng 
125da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
126da14cebeSEric Cheng 
127da14cebeSEric Cheng 	ASSERT(grp->mbg_addr != NULL);
128da14cebeSEric Cheng 	kmem_free(grp->mbg_addr, mip->mi_type->mt_addr_length);
129da14cebeSEric Cheng 	kmem_free(grp->mbg_clients,
130da14cebeSEric Cheng 	    grp->mbg_nclients_alloc * sizeof (mac_bcast_grp_mcip_t));
131da14cebeSEric Cheng 	mip->mi_bcast_ngrps--;
132da14cebeSEric Cheng 	kmem_cache_free(mac_bcast_grp_cache, grp);
133da14cebeSEric Cheng }
134da14cebeSEric Cheng 
135da14cebeSEric Cheng /*
136da14cebeSEric Cheng  * arg1: broadcast group
137da14cebeSEric Cheng  * arg2: sender MAC client if it is being sent by a MAC client,
138da14cebeSEric Cheng  * NULL if it was received from the wire.
139da14cebeSEric Cheng  */
140da14cebeSEric Cheng void
mac_bcast_send(void * arg1,void * arg2,mblk_t * mp_chain,boolean_t is_loopback)141da14cebeSEric Cheng mac_bcast_send(void *arg1, void *arg2, mblk_t *mp_chain, boolean_t is_loopback)
142da14cebeSEric Cheng {
143da14cebeSEric Cheng 	mac_bcast_grp_t *grp = arg1;
144da14cebeSEric Cheng 	mac_client_impl_t *src_mcip = arg2, *dst_mcip;
145da14cebeSEric Cheng 	mac_impl_t *mip = grp->mbg_mac_impl;
146da14cebeSEric Cheng 	uint64_t gen;
147da14cebeSEric Cheng 	uint_t i;
148da14cebeSEric Cheng 	mblk_t *mp_chain1;
149da14cebeSEric Cheng 	flow_entry_t	*flent;
150da14cebeSEric Cheng 	int err;
151da14cebeSEric Cheng 
152da14cebeSEric Cheng 	rw_enter(&mip->mi_rw_lock, RW_READER);
153da14cebeSEric Cheng 
154da14cebeSEric Cheng 	/*
155da14cebeSEric Cheng 	 * Pass a copy of the mp chain to every MAC client except the sender
156da14cebeSEric Cheng 	 * MAC client, if the packet was not received from the underlying NIC.
157da14cebeSEric Cheng 	 *
158da14cebeSEric Cheng 	 * The broadcast group lock should not be held across calls to
159da14cebeSEric Cheng 	 * the flow's callback function, since the same group could
160da14cebeSEric Cheng 	 * potentially be accessed from the same context. When the lock
161da14cebeSEric Cheng 	 * is reacquired, changes to the broadcast group while the lock
162da14cebeSEric Cheng 	 * was released are caught using a generation counter incremented
163da14cebeSEric Cheng 	 * each time the list of MAC clients associated with the broadcast
164da14cebeSEric Cheng 	 * group is changed.
165da14cebeSEric Cheng 	 */
166da14cebeSEric Cheng 	for (i = 0; i < grp->mbg_nclients_alloc; i++) {
167da14cebeSEric Cheng 		dst_mcip = grp->mbg_clients[i].mgb_client;
168da14cebeSEric Cheng 		if (dst_mcip == NULL)
169da14cebeSEric Cheng 			continue;
170da14cebeSEric Cheng 		flent = dst_mcip->mci_flent;
171da14cebeSEric Cheng 		if (flent == NULL || dst_mcip == src_mcip) {
172da14cebeSEric Cheng 			/*
173da14cebeSEric Cheng 			 * Don't send a copy of the packet back to
174da14cebeSEric Cheng 			 * its sender.
175da14cebeSEric Cheng 			 */
176da14cebeSEric Cheng 			continue;
177da14cebeSEric Cheng 		}
178da14cebeSEric Cheng 
179da14cebeSEric Cheng 		/*
180da14cebeSEric Cheng 		 * It is important to hold a reference on the
181da14cebeSEric Cheng 		 * flow_ent here.
182da14cebeSEric Cheng 		 */
183da14cebeSEric Cheng 		if ((mp_chain1 = mac_copymsgchain_cksum(mp_chain)) == NULL)
184da14cebeSEric Cheng 			break;
185da14cebeSEric Cheng 		/*
186da14cebeSEric Cheng 		 * Fix the checksum for packets originating
187da14cebeSEric Cheng 		 * from the local machine.
188da14cebeSEric Cheng 		 */
189da14cebeSEric Cheng 		if ((src_mcip != NULL) &&
190da14cebeSEric Cheng 		    (mp_chain1 = mac_fix_cksum(mp_chain1)) == NULL)
191da14cebeSEric Cheng 			break;
192da14cebeSEric Cheng 
193da14cebeSEric Cheng 		FLOW_TRY_REFHOLD(flent, err);
194da14cebeSEric Cheng 		if (err != 0) {
195da14cebeSEric Cheng 			freemsgchain(mp_chain1);
196da14cebeSEric Cheng 			continue;
197da14cebeSEric Cheng 		}
198da14cebeSEric Cheng 
199da14cebeSEric Cheng 		gen = grp->mbg_clients_gen;
200da14cebeSEric Cheng 
201da14cebeSEric Cheng 		rw_exit(&mip->mi_rw_lock);
202da14cebeSEric Cheng 
203da14cebeSEric Cheng 		DTRACE_PROBE4(mac__bcast__send__to, mac_client_impl_t *,
204da14cebeSEric Cheng 		    src_mcip, flow_fn_t, dst_mcip->mci_flent->fe_cb_fn,
205da14cebeSEric Cheng 		    void *, dst_mcip->mci_flent->fe_cb_arg1,
206da14cebeSEric Cheng 		    void *, dst_mcip->mci_flent->fe_cb_arg2);
207da14cebeSEric Cheng 
208da14cebeSEric Cheng 		(dst_mcip->mci_flent->fe_cb_fn)(dst_mcip->mci_flent->fe_cb_arg1,
209da14cebeSEric Cheng 		    dst_mcip->mci_flent->fe_cb_arg2, mp_chain1, is_loopback);
210da14cebeSEric Cheng 		FLOW_REFRELE(flent);
211da14cebeSEric Cheng 
212da14cebeSEric Cheng 		rw_enter(&mip->mi_rw_lock, RW_READER);
213da14cebeSEric Cheng 
214da14cebeSEric Cheng 		/* update stats */
2150dc2366fSVenugopal Iyer 		if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) {
2160dc2366fSVenugopal Iyer 			MCIP_STAT_UPDATE(dst_mcip, multircv, 1);
2170dc2366fSVenugopal Iyer 			MCIP_STAT_UPDATE(dst_mcip, multircvbytes,
2180dc2366fSVenugopal Iyer 			    msgdsize(mp_chain));
2190dc2366fSVenugopal Iyer 		} else {
2200dc2366fSVenugopal Iyer 			MCIP_STAT_UPDATE(dst_mcip, brdcstrcv, 1);
2210dc2366fSVenugopal Iyer 			MCIP_STAT_UPDATE(dst_mcip, brdcstrcvbytes,
2220dc2366fSVenugopal Iyer 			    msgdsize(mp_chain));
2230dc2366fSVenugopal Iyer 		}
224da14cebeSEric Cheng 
225da14cebeSEric Cheng 		if (grp->mbg_clients_gen != gen) {
226da14cebeSEric Cheng 			/*
227da14cebeSEric Cheng 			 * The list of MAC clients associated with the group
228da14cebeSEric Cheng 			 * was changed while the lock was released.
229da14cebeSEric Cheng 			 * Give up on the current packet.
230da14cebeSEric Cheng 			 */
231da14cebeSEric Cheng 			rw_exit(&mip->mi_rw_lock);
232da14cebeSEric Cheng 			freemsgchain(mp_chain);
233da14cebeSEric Cheng 			return;
234da14cebeSEric Cheng 		}
235da14cebeSEric Cheng 	}
236da14cebeSEric Cheng 	rw_exit(&mip->mi_rw_lock);
237da14cebeSEric Cheng 
238da14cebeSEric Cheng 	if (src_mcip != NULL) {
239da14cebeSEric Cheng 		/*
240da14cebeSEric Cheng 		 * The packet was sent from one of the MAC clients,
241da14cebeSEric Cheng 		 * so we need to send a copy of the packet to the
242da14cebeSEric Cheng 		 * underlying NIC so that it can be sent on the wire.
243da14cebeSEric Cheng 		 */
2440dc2366fSVenugopal Iyer 		MCIP_STAT_UPDATE(src_mcip, multixmt, 1);
2450dc2366fSVenugopal Iyer 		MCIP_STAT_UPDATE(src_mcip, multixmtbytes, msgdsize(mp_chain));
2460dc2366fSVenugopal Iyer 		MCIP_STAT_UPDATE(src_mcip, brdcstxmt, 1);
2470dc2366fSVenugopal Iyer 		MCIP_STAT_UPDATE(src_mcip, brdcstxmtbytes, msgdsize(mp_chain));
248da14cebeSEric Cheng 
2490dc2366fSVenugopal Iyer 		MAC_TX(mip, mip->mi_default_tx_ring, mp_chain, src_mcip);
2504eaa4710SRishi Srivatsavai 		if (mp_chain != NULL)
2514eaa4710SRishi Srivatsavai 			freemsgchain(mp_chain);
252da14cebeSEric Cheng 	} else {
253da14cebeSEric Cheng 		freemsgchain(mp_chain);
254da14cebeSEric Cheng 	}
255da14cebeSEric Cheng }
256da14cebeSEric Cheng 
257da14cebeSEric Cheng /*
258da14cebeSEric Cheng  * Add the specified MAC client to the group corresponding to the specified
259da14cebeSEric Cheng  * broadcast or multicast address.
260da14cebeSEric Cheng  * Return 0 on success, or an errno value on failure.
261da14cebeSEric Cheng  */
262da14cebeSEric Cheng int
mac_bcast_add(mac_client_impl_t * mcip,const uint8_t * addr,uint16_t vid,mac_addrtype_t addrtype)263da14cebeSEric Cheng mac_bcast_add(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid,
264da14cebeSEric Cheng     mac_addrtype_t addrtype)
265da14cebeSEric Cheng {
266da14cebeSEric Cheng 	mac_impl_t 		*mip = mcip->mci_mip;
267da14cebeSEric Cheng 	mac_bcast_grp_t		*grp = NULL, **last_grp;
268da14cebeSEric Cheng 	size_t			addr_len = mip->mi_type->mt_addr_length;
269da14cebeSEric Cheng 	int			rc = 0;
270da14cebeSEric Cheng 	int			i, index = -1;
271ae6aa22aSVenugopal Iyer 	mac_mcast_addrs_t	**prev_mi_addr = NULL;
272ae6aa22aSVenugopal Iyer 	mac_mcast_addrs_t	**prev_mci_addr = NULL;
273da14cebeSEric Cheng 
274da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
275da14cebeSEric Cheng 
276da14cebeSEric Cheng 	ASSERT(addrtype == MAC_ADDRTYPE_MULTICAST ||
277da14cebeSEric Cheng 	    addrtype == MAC_ADDRTYPE_BROADCAST);
278da14cebeSEric Cheng 
279ae6aa22aSVenugopal Iyer 	/*
280ae6aa22aSVenugopal Iyer 	 * Add the MAC client to the list of MAC clients associated
281ae6aa22aSVenugopal Iyer 	 * with the group.
282ae6aa22aSVenugopal Iyer 	 */
283ae6aa22aSVenugopal Iyer 	if (addrtype == MAC_ADDRTYPE_MULTICAST) {
284ae6aa22aSVenugopal Iyer 		mac_mcast_addrs_t	*maddr;
285ae6aa22aSVenugopal Iyer 
286ae6aa22aSVenugopal Iyer 		/*
287ae6aa22aSVenugopal Iyer 		 * In case of a driver (say aggr), we need this information
288ae6aa22aSVenugopal Iyer 		 * on a per MAC instance basis.
289ae6aa22aSVenugopal Iyer 		 */
290ae6aa22aSVenugopal Iyer 		prev_mi_addr = &mip->mi_mcast_addrs;
291ae6aa22aSVenugopal Iyer 		for (maddr = *prev_mi_addr; maddr != NULL;
292ae6aa22aSVenugopal Iyer 		    prev_mi_addr = &maddr->mma_next, maddr = maddr->mma_next) {
293ae6aa22aSVenugopal Iyer 			if (bcmp(maddr->mma_addr, addr, addr_len) == 0)
294ae6aa22aSVenugopal Iyer 				break;
295ae6aa22aSVenugopal Iyer 		}
296ae6aa22aSVenugopal Iyer 		if (maddr == NULL) {
297ae6aa22aSVenugopal Iyer 			/*
298ae6aa22aSVenugopal Iyer 			 * For multicast addresses, have the underlying MAC
299ae6aa22aSVenugopal Iyer 			 * join the corresponding multicast group.
300ae6aa22aSVenugopal Iyer 			 */
301ae6aa22aSVenugopal Iyer 			rc = mip->mi_multicst(mip->mi_driver, B_TRUE, addr);
302ae6aa22aSVenugopal Iyer 			if (rc != 0)
303ae6aa22aSVenugopal Iyer 				return (rc);
304ae6aa22aSVenugopal Iyer 			maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t),
305ae6aa22aSVenugopal Iyer 			    KM_SLEEP);
306ae6aa22aSVenugopal Iyer 			bcopy(addr, maddr->mma_addr, addr_len);
307ae6aa22aSVenugopal Iyer 			*prev_mi_addr = maddr;
308ae6aa22aSVenugopal Iyer 		} else {
309ae6aa22aSVenugopal Iyer 			prev_mi_addr = NULL;
310ae6aa22aSVenugopal Iyer 		}
311ae6aa22aSVenugopal Iyer 		maddr->mma_ref++;
312ae6aa22aSVenugopal Iyer 
313ae6aa22aSVenugopal Iyer 		/*
314ae6aa22aSVenugopal Iyer 		 * We maintain a separate list for each MAC client. Get
315ae6aa22aSVenugopal Iyer 		 * the entry or add, if it is not present.
316ae6aa22aSVenugopal Iyer 		 */
317ae6aa22aSVenugopal Iyer 		prev_mci_addr = &mcip->mci_mcast_addrs;
318ae6aa22aSVenugopal Iyer 		for (maddr = *prev_mci_addr; maddr != NULL;
319ae6aa22aSVenugopal Iyer 		    prev_mci_addr = &maddr->mma_next, maddr = maddr->mma_next) {
320ae6aa22aSVenugopal Iyer 			if (bcmp(maddr->mma_addr, addr, addr_len) == 0)
321ae6aa22aSVenugopal Iyer 				break;
322ae6aa22aSVenugopal Iyer 		}
323ae6aa22aSVenugopal Iyer 		if (maddr == NULL) {
324ae6aa22aSVenugopal Iyer 			maddr = kmem_zalloc(sizeof (mac_mcast_addrs_t),
325ae6aa22aSVenugopal Iyer 			    KM_SLEEP);
326ae6aa22aSVenugopal Iyer 			bcopy(addr, maddr->mma_addr, addr_len);
327ae6aa22aSVenugopal Iyer 			*prev_mci_addr = maddr;
328ae6aa22aSVenugopal Iyer 		} else {
329ae6aa22aSVenugopal Iyer 			prev_mci_addr = NULL;
330ae6aa22aSVenugopal Iyer 		}
331ae6aa22aSVenugopal Iyer 		maddr->mma_ref++;
332ae6aa22aSVenugopal Iyer 	}
333ae6aa22aSVenugopal Iyer 
334da14cebeSEric Cheng 	/* The list is protected by the perimeter */
335da14cebeSEric Cheng 	last_grp = &mip->mi_bcast_grp;
336da14cebeSEric Cheng 	for (grp = *last_grp; grp != NULL;
337da14cebeSEric Cheng 	    last_grp = &grp->mbg_next, grp = grp->mbg_next) {
338da14cebeSEric Cheng 		if (bcmp(grp->mbg_addr, addr, addr_len) == 0 &&
339da14cebeSEric Cheng 		    grp->mbg_vid == vid)
340da14cebeSEric Cheng 			break;
341da14cebeSEric Cheng 	}
342da14cebeSEric Cheng 
343da14cebeSEric Cheng 	if (grp == NULL) {
344da14cebeSEric Cheng 		/*
345da14cebeSEric Cheng 		 * The group does not yet exist, create it.
346da14cebeSEric Cheng 		 */
347da14cebeSEric Cheng 		flow_desc_t flow_desc;
348da000602SGirish Moodalbail 		char flow_name[MAXFLOWNAMELEN];
349da14cebeSEric Cheng 
350da14cebeSEric Cheng 		grp = kmem_cache_alloc(mac_bcast_grp_cache, KM_SLEEP);
351da14cebeSEric Cheng 		bzero(grp, sizeof (mac_bcast_grp_t));
352da14cebeSEric Cheng 		grp->mbg_next = NULL;
353da14cebeSEric Cheng 		grp->mbg_mac_impl = mip;
354da14cebeSEric Cheng 
355da14cebeSEric Cheng 		DTRACE_PROBE1(mac__bcast__add__new__group, mac_bcast_grp_t *,
356da14cebeSEric Cheng 		    grp);
357da14cebeSEric Cheng 
358da14cebeSEric Cheng 		grp->mbg_addr = kmem_zalloc(addr_len, KM_SLEEP);
359da14cebeSEric Cheng 		bcopy(addr, grp->mbg_addr, addr_len);
360da14cebeSEric Cheng 		grp->mbg_addrtype = addrtype;
361da14cebeSEric Cheng 		grp->mbg_vid = vid;
362da14cebeSEric Cheng 
363da14cebeSEric Cheng 		/*
364da14cebeSEric Cheng 		 * Add a new flow to the underlying MAC.
365da14cebeSEric Cheng 		 */
366da14cebeSEric Cheng 		bzero(&flow_desc, sizeof (flow_desc));
367da14cebeSEric Cheng 		bcopy(addr, &flow_desc.fd_dst_mac, addr_len);
368da14cebeSEric Cheng 		flow_desc.fd_mac_len = (uint32_t)addr_len;
369da14cebeSEric Cheng 
370da14cebeSEric Cheng 		flow_desc.fd_mask = FLOW_LINK_DST;
371da14cebeSEric Cheng 		if (vid != 0) {
372da14cebeSEric Cheng 			flow_desc.fd_vid = vid;
373da14cebeSEric Cheng 			flow_desc.fd_mask |= FLOW_LINK_VID;
374da14cebeSEric Cheng 		}
375da14cebeSEric Cheng 
376*1a5e258fSJosef 'Jeff' Sipek 		grp->mbg_id = atomic_inc_32_nv(&mac_bcast_id);
377da14cebeSEric Cheng 		(void) sprintf(flow_name,
378da14cebeSEric Cheng 		    "mac/%s/mcast%d", mip->mi_name, grp->mbg_id);
379da14cebeSEric Cheng 
380da14cebeSEric Cheng 		rc = mac_flow_create(&flow_desc, NULL, flow_name,
381da14cebeSEric Cheng 		    grp, FLOW_MCAST, &grp->mbg_flow_ent);
382da14cebeSEric Cheng 		if (rc != 0) {
383da14cebeSEric Cheng 			kmem_free(grp->mbg_addr, addr_len);
384da14cebeSEric Cheng 			kmem_cache_free(mac_bcast_grp_cache, grp);
385ae6aa22aSVenugopal Iyer 			goto fail;
386da14cebeSEric Cheng 		}
387da14cebeSEric Cheng 		grp->mbg_flow_ent->fe_mbg = grp;
388da14cebeSEric Cheng 		mip->mi_bcast_ngrps++;
389da14cebeSEric Cheng 
390da14cebeSEric Cheng 		/*
391da14cebeSEric Cheng 		 * Initial creation reference on the flow. This is released
392da14cebeSEric Cheng 		 * in the corresponding delete action i_mac_bcast_delete()
393da14cebeSEric Cheng 		 */
394da14cebeSEric Cheng 		FLOW_REFHOLD(grp->mbg_flow_ent);
395da14cebeSEric Cheng 
396da14cebeSEric Cheng 		/*
397da14cebeSEric Cheng 		 * When the multicast and broadcast packet is received
398da14cebeSEric Cheng 		 * by the underlying NIC, mac_rx_classify() will invoke
399da14cebeSEric Cheng 		 * mac_bcast_send() with arg2=NULL, which will cause
400da14cebeSEric Cheng 		 * mac_bcast_send() to send a copy of the packet(s)
401da14cebeSEric Cheng 		 * to every MAC client opened on top of the underlying MAC.
402da14cebeSEric Cheng 		 *
403da14cebeSEric Cheng 		 * When the mac_bcast_send() function is invoked from
404da14cebeSEric Cheng 		 * the transmit path of a MAC client, it will specify the
405da14cebeSEric Cheng 		 * transmitting MAC client as the arg2 value, which will
406da14cebeSEric Cheng 		 * allow mac_bcast_send() to skip that MAC client and not
407da14cebeSEric Cheng 		 * send it a copy of the packet.
408da14cebeSEric Cheng 		 *
409da14cebeSEric Cheng 		 * We program the classifier to dispatch matching broadcast
410da14cebeSEric Cheng 		 * packets to mac_bcast_send().
411da14cebeSEric Cheng 		 */
412da14cebeSEric Cheng 
413da14cebeSEric Cheng 		grp->mbg_flow_ent->fe_cb_fn = mac_bcast_send;
414da14cebeSEric Cheng 		grp->mbg_flow_ent->fe_cb_arg1 = grp;
415da14cebeSEric Cheng 		grp->mbg_flow_ent->fe_cb_arg2 = NULL;
416da14cebeSEric Cheng 
417da14cebeSEric Cheng 		rc = mac_flow_add(mip->mi_flow_tab, grp->mbg_flow_ent);
418da14cebeSEric Cheng 		if (rc != 0) {
419da14cebeSEric Cheng 			FLOW_FINAL_REFRELE(grp->mbg_flow_ent);
420ae6aa22aSVenugopal Iyer 			goto fail;
421da14cebeSEric Cheng 		}
422da14cebeSEric Cheng 
423da14cebeSEric Cheng 		*last_grp = grp;
424da14cebeSEric Cheng 	}
425da14cebeSEric Cheng 
426da14cebeSEric Cheng 	ASSERT(grp->mbg_addrtype == addrtype);
427da14cebeSEric Cheng 
428da14cebeSEric Cheng 	/*
429da14cebeSEric Cheng 	 * Add the MAC client to the list of MAC clients associated
430da14cebeSEric Cheng 	 * with the group.
431da14cebeSEric Cheng 	 */
432da14cebeSEric Cheng 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
433da14cebeSEric Cheng 	for (i = 0; i < grp->mbg_nclients_alloc; i++) {
434da14cebeSEric Cheng 		/*
435da14cebeSEric Cheng 		 * The MAC client was already added, say when we have
436da14cebeSEric Cheng 		 * different unicast addresses with the same vid.
437da14cebeSEric Cheng 		 * Just increment the ref and we are done.
438da14cebeSEric Cheng 		 */
439da14cebeSEric Cheng 		if (grp->mbg_clients[i].mgb_client == mcip) {
440da14cebeSEric Cheng 			grp->mbg_clients[i].mgb_client_ref++;
441ae6aa22aSVenugopal Iyer 			rw_exit(&mip->mi_rw_lock);
442ae6aa22aSVenugopal Iyer 			return (0);
443da14cebeSEric Cheng 		} else if (grp->mbg_clients[i].mgb_client == NULL &&
444da14cebeSEric Cheng 		    index == -1) {
445da14cebeSEric Cheng 			index = i;
446da14cebeSEric Cheng 		}
447da14cebeSEric Cheng 	}
448da14cebeSEric Cheng 	if (grp->mbg_nclients_alloc == grp->mbg_nclients) {
449da14cebeSEric Cheng 		mac_bcast_grp_mcip_t	*new_clients;
450da14cebeSEric Cheng 		uint_t			new_size = grp->mbg_nclients+1;
451da14cebeSEric Cheng 
452da14cebeSEric Cheng 		new_clients = kmem_zalloc(new_size *
453da14cebeSEric Cheng 		    sizeof (mac_bcast_grp_mcip_t), KM_SLEEP);
454da14cebeSEric Cheng 
455da14cebeSEric Cheng 		if (grp->mbg_nclients > 0) {
456da14cebeSEric Cheng 			ASSERT(grp->mbg_clients != NULL);
457da14cebeSEric Cheng 			bcopy(grp->mbg_clients, new_clients, grp->mbg_nclients *
458da14cebeSEric Cheng 			    sizeof (mac_bcast_grp_mcip_t));
459da14cebeSEric Cheng 			kmem_free(grp->mbg_clients, grp->mbg_nclients *
460da14cebeSEric Cheng 			    sizeof (mac_bcast_grp_mcip_t));
461da14cebeSEric Cheng 		}
462da14cebeSEric Cheng 
463da14cebeSEric Cheng 		grp->mbg_clients = new_clients;
464da14cebeSEric Cheng 		grp->mbg_nclients_alloc = new_size;
465da14cebeSEric Cheng 		index = new_size - 1;
466da14cebeSEric Cheng 	}
467da14cebeSEric Cheng 
468da14cebeSEric Cheng 	ASSERT(index != -1);
469da14cebeSEric Cheng 	grp->mbg_clients[index].mgb_client = mcip;
470da14cebeSEric Cheng 	grp->mbg_clients[index].mgb_client_ref = 1;
471da14cebeSEric Cheng 	grp->mbg_nclients++;
472da14cebeSEric Cheng 	/*
473da14cebeSEric Cheng 	 * Since we're adding to the list of MAC clients using that group,
474da14cebeSEric Cheng 	 * kick the generation count, which will allow mac_bcast_send()
475da14cebeSEric Cheng 	 * to detect that condition after re-acquiring the lock.
476da14cebeSEric Cheng 	 */
477da14cebeSEric Cheng 	grp->mbg_clients_gen++;
478da14cebeSEric Cheng 	rw_exit(&mip->mi_rw_lock);
479da14cebeSEric Cheng 	return (0);
480ae6aa22aSVenugopal Iyer 
481ae6aa22aSVenugopal Iyer fail:
482ae6aa22aSVenugopal Iyer 	if (prev_mi_addr != NULL) {
483ae6aa22aSVenugopal Iyer 		kmem_free(*prev_mi_addr, sizeof (mac_mcast_addrs_t));
484ae6aa22aSVenugopal Iyer 		*prev_mi_addr = NULL;
485ae6aa22aSVenugopal Iyer 		(void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr);
486ae6aa22aSVenugopal Iyer 	}
487ae6aa22aSVenugopal Iyer 	if (prev_mci_addr != NULL) {
488ae6aa22aSVenugopal Iyer 		kmem_free(*prev_mci_addr, sizeof (mac_mcast_addrs_t));
489ae6aa22aSVenugopal Iyer 		*prev_mci_addr = NULL;
490ae6aa22aSVenugopal Iyer 	}
491ae6aa22aSVenugopal Iyer 	return (rc);
492da14cebeSEric Cheng }
493da14cebeSEric Cheng 
494da14cebeSEric Cheng /*
495da14cebeSEric Cheng  * Remove the specified MAC client from the group corresponding to
496da14cebeSEric Cheng  * the specific broadcast or multicast address.
497da14cebeSEric Cheng  *
498da14cebeSEric Cheng  * Note: mac_bcast_delete() calls  mac_remove_flow() which
499da14cebeSEric Cheng  * will call cv_wait for fe_refcnt to drop to 0. So this function
500da14cebeSEric Cheng  * should not be called from interrupt or STREAMS context.
501da14cebeSEric Cheng  */
502da14cebeSEric Cheng void
mac_bcast_delete(mac_client_impl_t * mcip,const uint8_t * addr,uint16_t vid)503da14cebeSEric Cheng mac_bcast_delete(mac_client_impl_t *mcip, const uint8_t *addr, uint16_t vid)
504da14cebeSEric Cheng {
505da14cebeSEric Cheng 	mac_impl_t *mip = mcip->mci_mip;
506da14cebeSEric Cheng 	mac_bcast_grp_t *grp = NULL, **prev;
507da14cebeSEric Cheng 	size_t addr_len = mip->mi_type->mt_addr_length;
508da14cebeSEric Cheng 	flow_entry_t *flent;
509da14cebeSEric Cheng 	uint_t i;
510da14cebeSEric Cheng 	mac_mcast_addrs_t	*maddr = NULL;
511da14cebeSEric Cheng 	mac_mcast_addrs_t	**mprev;
512da14cebeSEric Cheng 
513da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
514da14cebeSEric Cheng 
515da14cebeSEric Cheng 	/* find the broadcast group. The list is protected by the perimeter */
516da14cebeSEric Cheng 	prev = &mip->mi_bcast_grp;
517da14cebeSEric Cheng 	for (grp = mip->mi_bcast_grp; grp != NULL; prev = &grp->mbg_next,
518da14cebeSEric Cheng 	    grp = grp->mbg_next) {
519da14cebeSEric Cheng 		if (bcmp(grp->mbg_addr, addr, addr_len) == 0 &&
520da14cebeSEric Cheng 		    grp->mbg_vid == vid)
521da14cebeSEric Cheng 			break;
522da14cebeSEric Cheng 	}
523da14cebeSEric Cheng 	ASSERT(grp != NULL);
524da14cebeSEric Cheng 
525da14cebeSEric Cheng 	/*
526da14cebeSEric Cheng 	 * Remove the MAC client from the list of MAC clients associated
527da14cebeSEric Cheng 	 * with that broadcast group.
528da14cebeSEric Cheng 	 *
529da14cebeSEric Cheng 	 * We mark the mbg_clients[] location corresponding to the removed MAC
530da14cebeSEric Cheng 	 * client NULL and reuse that location when we add a new MAC client.
531da14cebeSEric Cheng 	 */
532da14cebeSEric Cheng 
533da14cebeSEric Cheng 	rw_enter(&mip->mi_rw_lock, RW_WRITER);
534da14cebeSEric Cheng 
535da14cebeSEric Cheng 	for (i = 0; i < grp->mbg_nclients_alloc; i++) {
536da14cebeSEric Cheng 		if (grp->mbg_clients[i].mgb_client == mcip)
537da14cebeSEric Cheng 			break;
538da14cebeSEric Cheng 	}
539da14cebeSEric Cheng 
540da14cebeSEric Cheng 	ASSERT(i < grp->mbg_nclients_alloc);
541da14cebeSEric Cheng 	/*
542da14cebeSEric Cheng 	 * If there are more references to this MAC client, then we let
543da14cebeSEric Cheng 	 * it remain till it goes to 0.
544da14cebeSEric Cheng 	 */
545da14cebeSEric Cheng 	if (--grp->mbg_clients[i].mgb_client_ref > 0)
546da14cebeSEric Cheng 		goto update_maddr;
547da14cebeSEric Cheng 
548da14cebeSEric Cheng 	grp->mbg_clients[i].mgb_client = NULL;
549da14cebeSEric Cheng 	grp->mbg_clients[i].mgb_client_ref = 0;
550da14cebeSEric Cheng 
551da14cebeSEric Cheng 	/*
552da14cebeSEric Cheng 	 * Since we're removing from the list of MAC clients using that group,
553da14cebeSEric Cheng 	 * kick the generation count, which will allow mac_bcast_send()
554da14cebeSEric Cheng 	 * to detect that condition.
555da14cebeSEric Cheng 	 */
556da14cebeSEric Cheng 	grp->mbg_clients_gen++;
557da14cebeSEric Cheng 
558da14cebeSEric Cheng 	if (--grp->mbg_nclients == 0) {
559da14cebeSEric Cheng 		/*
560da14cebeSEric Cheng 		 * The last MAC client of the group was just removed.
561da14cebeSEric Cheng 		 * Unlink the current group from the list of groups
562da14cebeSEric Cheng 		 * defined on top of the underlying NIC. The group
563da14cebeSEric Cheng 		 * structure will stay around until the last reference
564da14cebeSEric Cheng 		 * is dropped.
565da14cebeSEric Cheng 		 */
566da14cebeSEric Cheng 		*prev = grp->mbg_next;
567da14cebeSEric Cheng 	}
568da14cebeSEric Cheng update_maddr:
569ae6aa22aSVenugopal Iyer 	rw_exit(&mip->mi_rw_lock);
570ae6aa22aSVenugopal Iyer 
571da14cebeSEric Cheng 	if (grp->mbg_addrtype == MAC_ADDRTYPE_MULTICAST) {
572da14cebeSEric Cheng 		mprev = &mcip->mci_mcast_addrs;
573da14cebeSEric Cheng 		for (maddr = mcip->mci_mcast_addrs; maddr != NULL;
574da14cebeSEric Cheng 		    mprev = &maddr->mma_next, maddr = maddr->mma_next) {
575da14cebeSEric Cheng 			if (bcmp(grp->mbg_addr, maddr->mma_addr,
576da14cebeSEric Cheng 			    mip->mi_type->mt_addr_length) == 0)
577da14cebeSEric Cheng 				break;
578da14cebeSEric Cheng 		}
579da14cebeSEric Cheng 		ASSERT(maddr != NULL);
580da14cebeSEric Cheng 		if (--maddr->mma_ref == 0) {
581da14cebeSEric Cheng 			*mprev = maddr->mma_next;
582da14cebeSEric Cheng 			maddr->mma_next = NULL;
583da14cebeSEric Cheng 			kmem_free(maddr, sizeof (mac_mcast_addrs_t));
584da14cebeSEric Cheng 		}
585da14cebeSEric Cheng 
586da14cebeSEric Cheng 		mprev = &mip->mi_mcast_addrs;
587da14cebeSEric Cheng 		for (maddr = mip->mi_mcast_addrs; maddr != NULL;
588da14cebeSEric Cheng 		    mprev = &maddr->mma_next, maddr = maddr->mma_next) {
589da14cebeSEric Cheng 			if (bcmp(grp->mbg_addr, maddr->mma_addr,
590da14cebeSEric Cheng 			    mip->mi_type->mt_addr_length) == 0)
591da14cebeSEric Cheng 				break;
592da14cebeSEric Cheng 		}
593da14cebeSEric Cheng 		ASSERT(maddr != NULL);
594da14cebeSEric Cheng 		if (--maddr->mma_ref == 0) {
595ae6aa22aSVenugopal Iyer 			(void) mip->mi_multicst(mip->mi_driver, B_FALSE, addr);
596da14cebeSEric Cheng 			*mprev = maddr->mma_next;
597da14cebeSEric Cheng 			maddr->mma_next = NULL;
598da14cebeSEric Cheng 			kmem_free(maddr, sizeof (mac_mcast_addrs_t));
599da14cebeSEric Cheng 		}
600da14cebeSEric Cheng 	}
601da14cebeSEric Cheng 
602da14cebeSEric Cheng 	/*
603da14cebeSEric Cheng 	 * If the group itself is being removed, remove the
604da14cebeSEric Cheng 	 * corresponding flow from the underlying NIC.
605da14cebeSEric Cheng 	 */
606da14cebeSEric Cheng 	flent = grp->mbg_flow_ent;
607da14cebeSEric Cheng 	if (grp->mbg_nclients == 0) {
608da14cebeSEric Cheng 		mac_flow_remove(mip->mi_flow_tab, flent, B_FALSE);
609da14cebeSEric Cheng 		mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
610da14cebeSEric Cheng 		FLOW_FINAL_REFRELE(flent);
611da14cebeSEric Cheng 	}
612da14cebeSEric Cheng }
613da14cebeSEric Cheng 
614da14cebeSEric Cheng /*
615da14cebeSEric Cheng  * This will be called by a driver, such as aggr, when a port is added/removed
616da14cebeSEric Cheng  * to add/remove the port to/from all the multcast addresses for that aggr.
617da14cebeSEric Cheng  */
618da14cebeSEric Cheng void
mac_bcast_refresh(mac_impl_t * mip,mac_multicst_t refresh_fn,void * arg,boolean_t add)619da14cebeSEric Cheng mac_bcast_refresh(mac_impl_t *mip, mac_multicst_t refresh_fn, void *arg,
620da14cebeSEric Cheng     boolean_t add)
621da14cebeSEric Cheng {
622da14cebeSEric Cheng 	mac_mcast_addrs_t *grp, *next;
623da14cebeSEric Cheng 
624da14cebeSEric Cheng 	ASSERT(refresh_fn != NULL);
625da14cebeSEric Cheng 
626da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
627da14cebeSEric Cheng 
628da14cebeSEric Cheng 	/*
629da14cebeSEric Cheng 	 * Walk the multicast address list and call the refresh function for
630da14cebeSEric Cheng 	 * each address.
631da14cebeSEric Cheng 	 */
632da14cebeSEric Cheng 
633da14cebeSEric Cheng 	for (grp = mip->mi_mcast_addrs; grp != NULL; grp = next) {
634da14cebeSEric Cheng 		/*
635da14cebeSEric Cheng 		 * Save the next pointer just in case the refresh
636da14cebeSEric Cheng 		 * function's action causes the group entry to be
637da14cebeSEric Cheng 		 * freed.
638da14cebeSEric Cheng 		 * We won't be adding to this list as part of the
639da14cebeSEric Cheng 		 * refresh.
640da14cebeSEric Cheng 		 */
641da14cebeSEric Cheng 		next = grp->mma_next;
642da14cebeSEric Cheng 		refresh_fn(arg, add, grp->mma_addr);
643da14cebeSEric Cheng 	}
644da14cebeSEric Cheng }
645da14cebeSEric Cheng 
646da14cebeSEric Cheng /*
647da14cebeSEric Cheng  * Walk the MAC client's multicast address list and add/remove the addr/vid
648da14cebeSEric Cheng  * ('arg' is 'flent') to all the addresses.
649da14cebeSEric Cheng  */
650da14cebeSEric Cheng void
mac_client_bcast_refresh(mac_client_impl_t * mcip,mac_multicst_t refresh_fn,void * arg,boolean_t add)651da14cebeSEric Cheng mac_client_bcast_refresh(mac_client_impl_t *mcip, mac_multicst_t refresh_fn,
652da14cebeSEric Cheng     void *arg, boolean_t add)
653da14cebeSEric Cheng {
654da14cebeSEric Cheng 	mac_mcast_addrs_t *grp, *next;
655da14cebeSEric Cheng 	mac_impl_t		*mip = mcip->mci_mip;
656da14cebeSEric Cheng 
657da14cebeSEric Cheng 	ASSERT(refresh_fn != NULL);
658da14cebeSEric Cheng 
659da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
660da14cebeSEric Cheng 	/*
661da14cebeSEric Cheng 	 * Walk the multicast address list and call the refresh function for
662da14cebeSEric Cheng 	 * each address.
663da14cebeSEric Cheng 	 * Broadcast addresses are not added or removed through the multicast
664da14cebeSEric Cheng 	 * entry points, so don't include them as part of the refresh.
665da14cebeSEric Cheng 	 */
666da14cebeSEric Cheng 	for (grp = mcip->mci_mcast_addrs; grp != NULL; grp = next) {
667da14cebeSEric Cheng 		/*
668da14cebeSEric Cheng 		 * Save the next pointer just in case the refresh
669da14cebeSEric Cheng 		 * function's action causes the group entry to be
670da14cebeSEric Cheng 		 * freed.
671da14cebeSEric Cheng 		 * We won't be adding to this list as part of the
672da14cebeSEric Cheng 		 * refresh.
673da14cebeSEric Cheng 		 */
674da14cebeSEric Cheng 		next = grp->mma_next;
675da14cebeSEric Cheng 		refresh_fn(arg, add, grp->mma_addr);
676da14cebeSEric Cheng 	}
677da14cebeSEric Cheng }
678