xref: /titanic_41/usr/src/uts/common/io/aggr/aggr_grp.c (revision f8bfcf82efef2b9c66b9fe713389124a35fb1ff1)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5f12af565Snd99603  * Common Development and Distribution License (the "License").
6f12af565Snd99603  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
220591ddd0SPrakash Jalan  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23*f8bfcf82SRobert Mustacchi  * Copyright 2015 Joyent, Inc.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
287c478bd9Sstevel@tonic-gate  *
297c478bd9Sstevel@tonic-gate  * An instance of the structure aggr_grp_t is allocated for each
307c478bd9Sstevel@tonic-gate  * link aggregation group. When created, aggr_grp_t objects are
31210db224Sericheng  * entered into the aggr_grp_hash hash table maintained by the modhash
32d62bc4baSyz147064  * module. The hash key is the linkid associated with the link
33d62bc4baSyz147064  * aggregation group.
347c478bd9Sstevel@tonic-gate  *
357c478bd9Sstevel@tonic-gate  * A set of MAC ports are associated with each association group.
360dc2366fSVenugopal Iyer  *
370dc2366fSVenugopal Iyer  * Aggr pseudo TX rings
380dc2366fSVenugopal Iyer  * --------------------
390dc2366fSVenugopal Iyer  * The underlying ports (NICs) in an aggregation can have TX rings. To
400dc2366fSVenugopal Iyer  * enhance aggr's performance, these TX rings are made available to the
410dc2366fSVenugopal Iyer  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
420dc2366fSVenugopal Iyer  * They are already present and implemented on the RX side. It is called
430dc2366fSVenugopal Iyer  * as pseudo RX rings. The same concept is extended to the TX side where
440dc2366fSVenugopal Iyer  * each TX ring of an underlying port is reflected in aggr as a pseudo
450dc2366fSVenugopal Iyer  * TX ring. Thus each pseudo TX ring will map to a specific hardware TX
460dc2366fSVenugopal Iyer  * ring. Even in the case of a NIC that does not have a TX ring, a pseudo
470dc2366fSVenugopal Iyer  * TX ring is given to the aggregation layer.
480dc2366fSVenugopal Iyer  *
490dc2366fSVenugopal Iyer  * With this change, the outgoing stack depth looks much better:
500dc2366fSVenugopal Iyer  *
510dc2366fSVenugopal Iyer  * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
520dc2366fSVenugopal Iyer  * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
530dc2366fSVenugopal Iyer  *
540dc2366fSVenugopal Iyer  * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings:
550dc2366fSVenugopal Iyer  * SRS_TX_AGGR and SRS_TX_BW_AGGR.
560dc2366fSVenugopal Iyer  *
570dc2366fSVenugopal Iyer  * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
580dc2366fSVenugopal Iyer  * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX
590dc2366fSVenugopal Iyer  * ring belonging to a port on which the packet has to be sent.
600dc2366fSVenugopal Iyer  * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
610dc2366fSVenugopal Iyer  * policy and then uses the fanout_hint passed to it to pick a TX ring from
620dc2366fSVenugopal Iyer  * the selected port.
630dc2366fSVenugopal Iyer  *
640dc2366fSVenugopal Iyer  * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
650dc2366fSVenugopal Iyer  * bandwidth limit is applied first on the outgoing packet and the packets
660dc2366fSVenugopal Iyer  * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
670dc2366fSVenugopal Iyer  * particular TX ring.
687c478bd9Sstevel@tonic-gate  */
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate #include <sys/types.h>
717c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
727c478bd9Sstevel@tonic-gate #include <sys/conf.h>
737c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
74da14cebeSEric Cheng #include <sys/disp.h>
757c478bd9Sstevel@tonic-gate #include <sys/list.h>
767c478bd9Sstevel@tonic-gate #include <sys/ksynch.h>
777c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
787c478bd9Sstevel@tonic-gate #include <sys/stream.h>
797c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
807c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
817c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
827c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
837c478bd9Sstevel@tonic-gate #include <sys/stat.h>
84210db224Sericheng #include <sys/modhash.h>
85d62bc4baSyz147064 #include <sys/id_space.h>
867c478bd9Sstevel@tonic-gate #include <sys/strsun.h>
872b24ab6bSSebastien Roy #include <sys/cred.h>
887c478bd9Sstevel@tonic-gate #include <sys/dlpi.h>
892b24ab6bSSebastien Roy #include <sys/zone.h>
90da14cebeSEric Cheng #include <sys/mac_provider.h>
91d62bc4baSyz147064 #include <sys/dls.h>
92d62bc4baSyz147064 #include <sys/vlan.h>
937c478bd9Sstevel@tonic-gate #include <sys/aggr.h>
947c478bd9Sstevel@tonic-gate #include <sys/aggr_impl.h>
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate static int aggr_m_start(void *);
977c478bd9Sstevel@tonic-gate static void aggr_m_stop(void *);
987c478bd9Sstevel@tonic-gate static int aggr_m_promisc(void *, boolean_t);
997c478bd9Sstevel@tonic-gate static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
1007c478bd9Sstevel@tonic-gate static int aggr_m_unicst(void *, const uint8_t *);
101ba2e4443Sseb static int aggr_m_stat(void *, uint_t, uint64_t *);
1027c478bd9Sstevel@tonic-gate static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
103ba2e4443Sseb static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
104986cab2cSGirish Moodalbail static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
105986cab2cSGirish Moodalbail     const void *);
1060dc2366fSVenugopal Iyer static void aggr_m_propinfo(void *, const char *, mac_prop_id_t,
1070dc2366fSVenugopal Iyer     mac_prop_info_handle_t);
108986cab2cSGirish Moodalbail 
109d62bc4baSyz147064 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
1104deae11aSyz147064 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
1114deae11aSyz147064     boolean_t *);
112d62bc4baSyz147064 
1137c478bd9Sstevel@tonic-gate static void aggr_grp_capab_set(aggr_grp_t *);
1147c478bd9Sstevel@tonic-gate static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
115f4420ae7Snd99603 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
116d62bc4baSyz147064 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
117f4420ae7Snd99603 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
118d62bc4baSyz147064 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
119da14cebeSEric Cheng 
120da14cebeSEric Cheng static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
121da14cebeSEric Cheng static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
122da14cebeSEric Cheng static int aggr_pseudo_disable_intr(mac_intr_handle_t);
123da14cebeSEric Cheng static int aggr_pseudo_enable_intr(mac_intr_handle_t);
124da14cebeSEric Cheng static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
125da14cebeSEric Cheng static void aggr_pseudo_stop_ring(mac_ring_driver_t);
126da14cebeSEric Cheng static int aggr_addmac(void *, const uint8_t *);
127da14cebeSEric Cheng static int aggr_remmac(void *, const uint8_t *);
128da14cebeSEric Cheng static mblk_t *aggr_rx_poll(void *, int);
129da14cebeSEric Cheng static void aggr_fill_ring(void *, mac_ring_type_t, const int,
130da14cebeSEric Cheng     const int, mac_ring_info_t *, mac_ring_handle_t);
131da14cebeSEric Cheng static void aggr_fill_group(void *, mac_ring_type_t, const int,
132da14cebeSEric Cheng     mac_group_info_t *, mac_group_handle_t);
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate static kmem_cache_t	*aggr_grp_cache;
135210db224Sericheng static mod_hash_t	*aggr_grp_hash;
136210db224Sericheng static krwlock_t	aggr_grp_lock;
137210db224Sericheng static uint_t		aggr_grp_cnt;
138d62bc4baSyz147064 static id_space_t	*key_ids;
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate #define	GRP_HASHSZ		64
141d62bc4baSyz147064 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
142da14cebeSEric Cheng #define	AGGR_PORT_NAME_DELIMIT '-'
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
1457c478bd9Sstevel@tonic-gate 
146986cab2cSGirish Moodalbail #define	AGGR_M_CALLBACK_FLAGS	\
1470dc2366fSVenugopal Iyer 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
148ba2e4443Sseb 
149ba2e4443Sseb static mac_callbacks_t aggr_m_callbacks = {
150ba2e4443Sseb 	AGGR_M_CALLBACK_FLAGS,
151ba2e4443Sseb 	aggr_m_stat,
152ba2e4443Sseb 	aggr_m_start,
153ba2e4443Sseb 	aggr_m_stop,
154ba2e4443Sseb 	aggr_m_promisc,
155ba2e4443Sseb 	aggr_m_multicst,
156da14cebeSEric Cheng 	NULL,
1570dc2366fSVenugopal Iyer 	NULL,
1580dc2366fSVenugopal Iyer 	NULL,
159ba2e4443Sseb 	aggr_m_ioctl,
160986cab2cSGirish Moodalbail 	aggr_m_capab_get,
161986cab2cSGirish Moodalbail 	NULL,
162986cab2cSGirish Moodalbail 	NULL,
163986cab2cSGirish Moodalbail 	aggr_m_setprop,
1640dc2366fSVenugopal Iyer 	NULL,
1650dc2366fSVenugopal Iyer 	aggr_m_propinfo
166ba2e4443Sseb };
167ba2e4443Sseb 
1687c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1697c478bd9Sstevel@tonic-gate static int
aggr_grp_constructor(void * buf,void * arg,int kmflag)1707c478bd9Sstevel@tonic-gate aggr_grp_constructor(void *buf, void *arg, int kmflag)
1717c478bd9Sstevel@tonic-gate {
1727c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = buf;
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate 	bzero(grp, sizeof (*grp));
175da14cebeSEric Cheng 	mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
176da14cebeSEric Cheng 	cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
177da14cebeSEric Cheng 	rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
178da14cebeSEric Cheng 	mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
179da14cebeSEric Cheng 	cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
1800dc2366fSVenugopal Iyer 	mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL);
1810dc2366fSVenugopal Iyer 	cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL);
1827c478bd9Sstevel@tonic-gate 	grp->lg_link_state = LINK_STATE_UNKNOWN;
1837c478bd9Sstevel@tonic-gate 	return (0);
1847c478bd9Sstevel@tonic-gate }
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1877c478bd9Sstevel@tonic-gate static void
aggr_grp_destructor(void * buf,void * arg)1887c478bd9Sstevel@tonic-gate aggr_grp_destructor(void *buf, void *arg)
1897c478bd9Sstevel@tonic-gate {
1907c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = buf;
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate 	if (grp->lg_tx_ports != NULL) {
1937c478bd9Sstevel@tonic-gate 		kmem_free(grp->lg_tx_ports,
1947c478bd9Sstevel@tonic-gate 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
1957c478bd9Sstevel@tonic-gate 	}
1967c478bd9Sstevel@tonic-gate 
197da14cebeSEric Cheng 	mutex_destroy(&grp->lg_lacp_lock);
198da14cebeSEric Cheng 	cv_destroy(&grp->lg_lacp_cv);
199da14cebeSEric Cheng 	mutex_destroy(&grp->lg_port_lock);
200da14cebeSEric Cheng 	cv_destroy(&grp->lg_port_cv);
201da14cebeSEric Cheng 	rw_destroy(&grp->lg_tx_lock);
2020dc2366fSVenugopal Iyer 	mutex_destroy(&grp->lg_tx_flowctl_lock);
2030dc2366fSVenugopal Iyer 	cv_destroy(&grp->lg_tx_flowctl_cv);
2047c478bd9Sstevel@tonic-gate }
2057c478bd9Sstevel@tonic-gate 
2067c478bd9Sstevel@tonic-gate void
aggr_grp_init(void)2077c478bd9Sstevel@tonic-gate aggr_grp_init(void)
2087c478bd9Sstevel@tonic-gate {
2097c478bd9Sstevel@tonic-gate 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
2107c478bd9Sstevel@tonic-gate 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
2117c478bd9Sstevel@tonic-gate 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
2127c478bd9Sstevel@tonic-gate 
213210db224Sericheng 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
214210db224Sericheng 	    GRP_HASHSZ, mod_hash_null_valdtor);
215210db224Sericheng 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
216210db224Sericheng 	aggr_grp_cnt = 0;
217d62bc4baSyz147064 
218d62bc4baSyz147064 	/*
219d62bc4baSyz147064 	 * Allocate an id space to manage key values (when key is not
220d62bc4baSyz147064 	 * specified). The range of the id space will be from
221d62bc4baSyz147064 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
222d62bc4baSyz147064 	 * uses a 16-bit key.
223d62bc4baSyz147064 	 */
224d62bc4baSyz147064 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
225d62bc4baSyz147064 	ASSERT(key_ids != NULL);
2267c478bd9Sstevel@tonic-gate }
2277c478bd9Sstevel@tonic-gate 
228c0192a57Sericheng void
aggr_grp_fini(void)2297c478bd9Sstevel@tonic-gate aggr_grp_fini(void)
2307c478bd9Sstevel@tonic-gate {
231d62bc4baSyz147064 	id_space_destroy(key_ids);
232210db224Sericheng 	rw_destroy(&aggr_grp_lock);
233210db224Sericheng 	mod_hash_destroy_idhash(aggr_grp_hash);
2347c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(aggr_grp_cache);
2357c478bd9Sstevel@tonic-gate }
2367c478bd9Sstevel@tonic-gate 
237210db224Sericheng uint_t
aggr_grp_count(void)238210db224Sericheng aggr_grp_count(void)
239210db224Sericheng {
240210db224Sericheng 	uint_t	count;
241210db224Sericheng 
242210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_READER);
243210db224Sericheng 	count = aggr_grp_cnt;
244210db224Sericheng 	rw_exit(&aggr_grp_lock);
245210db224Sericheng 	return (count);
246210db224Sericheng }
247210db224Sericheng 
2487c478bd9Sstevel@tonic-gate /*
249da14cebeSEric Cheng  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
250da14cebeSEric Cheng  * requires the mac perimeter, this function holds a reference of the aggr
251da14cebeSEric Cheng  * and aggr won't call mac_unregister() until this reference drops to 0.
252da14cebeSEric Cheng  */
253da14cebeSEric Cheng void
aggr_grp_port_hold(aggr_port_t * port)254da14cebeSEric Cheng aggr_grp_port_hold(aggr_port_t *port)
255da14cebeSEric Cheng {
256da14cebeSEric Cheng 	aggr_grp_t	*grp = port->lp_grp;
257da14cebeSEric Cheng 
258da14cebeSEric Cheng 	AGGR_PORT_REFHOLD(port);
259da14cebeSEric Cheng 	mutex_enter(&grp->lg_port_lock);
260da14cebeSEric Cheng 	grp->lg_port_ref++;
261da14cebeSEric Cheng 	mutex_exit(&grp->lg_port_lock);
262da14cebeSEric Cheng }
263da14cebeSEric Cheng 
264da14cebeSEric Cheng /*
265da14cebeSEric Cheng  * Release the reference of the grp and inform aggr_grp_delete() calling
266da14cebeSEric Cheng  * mac_unregister() is now safe.
267da14cebeSEric Cheng  */
268da14cebeSEric Cheng void
aggr_grp_port_rele(aggr_port_t * port)269da14cebeSEric Cheng aggr_grp_port_rele(aggr_port_t *port)
270da14cebeSEric Cheng {
271da14cebeSEric Cheng 	aggr_grp_t	*grp = port->lp_grp;
272da14cebeSEric Cheng 
273da14cebeSEric Cheng 	mutex_enter(&grp->lg_port_lock);
274da14cebeSEric Cheng 	if (--grp->lg_port_ref == 0)
275da14cebeSEric Cheng 		cv_signal(&grp->lg_port_cv);
276da14cebeSEric Cheng 	mutex_exit(&grp->lg_port_lock);
277da14cebeSEric Cheng 	AGGR_PORT_REFRELE(port);
278da14cebeSEric Cheng }
279da14cebeSEric Cheng 
280da14cebeSEric Cheng /*
281da14cebeSEric Cheng  * Wait for the port's lacp timer thread and the port's notification callback
282da14cebeSEric Cheng  * to exit.
283da14cebeSEric Cheng  */
284da14cebeSEric Cheng void
aggr_grp_port_wait(aggr_grp_t * grp)285da14cebeSEric Cheng aggr_grp_port_wait(aggr_grp_t *grp)
286da14cebeSEric Cheng {
287da14cebeSEric Cheng 	mutex_enter(&grp->lg_port_lock);
288da14cebeSEric Cheng 	if (grp->lg_port_ref != 0)
289da14cebeSEric Cheng 		cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
290da14cebeSEric Cheng 	mutex_exit(&grp->lg_port_lock);
291da14cebeSEric Cheng }
292da14cebeSEric Cheng 
293da14cebeSEric Cheng /*
2947c478bd9Sstevel@tonic-gate  * Attach a port to a link aggregation group.
2957c478bd9Sstevel@tonic-gate  *
2967c478bd9Sstevel@tonic-gate  * A port is attached to a link aggregation group once its speed
2977c478bd9Sstevel@tonic-gate  * and link state have been verified.
2987c478bd9Sstevel@tonic-gate  *
2997c478bd9Sstevel@tonic-gate  * Returns B_TRUE if the group link state or speed has changed. If
3007c478bd9Sstevel@tonic-gate  * it's the case, the caller must notify the MAC layer via a call
3017c478bd9Sstevel@tonic-gate  * to mac_link().
3027c478bd9Sstevel@tonic-gate  */
3037c478bd9Sstevel@tonic-gate boolean_t
aggr_grp_attach_port(aggr_grp_t * grp,aggr_port_t * port)3047c478bd9Sstevel@tonic-gate aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
3057c478bd9Sstevel@tonic-gate {
3064deae11aSyz147064 	boolean_t link_state_changed = B_FALSE;
3077c478bd9Sstevel@tonic-gate 
308da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
309da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
3107c478bd9Sstevel@tonic-gate 
3117c478bd9Sstevel@tonic-gate 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
3127c478bd9Sstevel@tonic-gate 		return (B_FALSE);
3137c478bd9Sstevel@tonic-gate 
3147c478bd9Sstevel@tonic-gate 	/*
3157c478bd9Sstevel@tonic-gate 	 * Validate the MAC port link speed and update the group
3167c478bd9Sstevel@tonic-gate 	 * link speed if needed.
3177c478bd9Sstevel@tonic-gate 	 */
3187c478bd9Sstevel@tonic-gate 	if (port->lp_ifspeed == 0 ||
3197c478bd9Sstevel@tonic-gate 	    port->lp_link_state != LINK_STATE_UP ||
3207c478bd9Sstevel@tonic-gate 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
3217c478bd9Sstevel@tonic-gate 		/*
3227c478bd9Sstevel@tonic-gate 		 * Can't attach a MAC port with unknown link speed,
3237c478bd9Sstevel@tonic-gate 		 * down link, or not in full duplex mode.
3247c478bd9Sstevel@tonic-gate 		 */
3257c478bd9Sstevel@tonic-gate 		return (B_FALSE);
3267c478bd9Sstevel@tonic-gate 	}
3277c478bd9Sstevel@tonic-gate 
3287c478bd9Sstevel@tonic-gate 	if (grp->lg_ifspeed == 0) {
3297c478bd9Sstevel@tonic-gate 		/*
3307c478bd9Sstevel@tonic-gate 		 * The group inherits the speed of the first link being
3317c478bd9Sstevel@tonic-gate 		 * attached.
3327c478bd9Sstevel@tonic-gate 		 */
3337c478bd9Sstevel@tonic-gate 		grp->lg_ifspeed = port->lp_ifspeed;
3344deae11aSyz147064 		link_state_changed = B_TRUE;
3357c478bd9Sstevel@tonic-gate 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
3367c478bd9Sstevel@tonic-gate 		/*
3377c478bd9Sstevel@tonic-gate 		 * The link speed of the MAC port must be the same as
3387c478bd9Sstevel@tonic-gate 		 * the group link speed, as per 802.3ad. Since it is
3397c478bd9Sstevel@tonic-gate 		 * not, the attach is cancelled.
3407c478bd9Sstevel@tonic-gate 		 */
3417c478bd9Sstevel@tonic-gate 		return (B_FALSE);
3427c478bd9Sstevel@tonic-gate 	}
3437c478bd9Sstevel@tonic-gate 
3447c478bd9Sstevel@tonic-gate 	grp->lg_nattached_ports++;
3457c478bd9Sstevel@tonic-gate 
3467c478bd9Sstevel@tonic-gate 	/*
3477c478bd9Sstevel@tonic-gate 	 * Update the group link state.
3487c478bd9Sstevel@tonic-gate 	 */
3497c478bd9Sstevel@tonic-gate 	if (grp->lg_link_state != LINK_STATE_UP) {
3507c478bd9Sstevel@tonic-gate 		grp->lg_link_state = LINK_STATE_UP;
3517c478bd9Sstevel@tonic-gate 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
3524deae11aSyz147064 		link_state_changed = B_TRUE;
3537c478bd9Sstevel@tonic-gate 	}
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate 	/*
3567c478bd9Sstevel@tonic-gate 	 * Update port's state.
3577c478bd9Sstevel@tonic-gate 	 */
3587c478bd9Sstevel@tonic-gate 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
3597c478bd9Sstevel@tonic-gate 
360ae6aa22aSVenugopal Iyer 	aggr_grp_multicst_port(port, B_TRUE);
361ae6aa22aSVenugopal Iyer 
3627c478bd9Sstevel@tonic-gate 	/*
363490ed22dSyz147064 	 * Set port's receive callback
364490ed22dSyz147064 	 */
365da14cebeSEric Cheng 	mac_rx_set(port->lp_mch, aggr_recv_cb, port);
366490ed22dSyz147064 
367490ed22dSyz147064 	/*
3687c478bd9Sstevel@tonic-gate 	 * If LACP is OFF, the port can be used to send data as soon
3697c478bd9Sstevel@tonic-gate 	 * as its link is up and verified to be compatible with the
3707c478bd9Sstevel@tonic-gate 	 * aggregation.
3717c478bd9Sstevel@tonic-gate 	 *
3727c478bd9Sstevel@tonic-gate 	 * If LACP is active or passive, notify the LACP subsystem, which
3737c478bd9Sstevel@tonic-gate 	 * will enable sending on the port following the LACP protocol.
3747c478bd9Sstevel@tonic-gate 	 */
3757c478bd9Sstevel@tonic-gate 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
3767c478bd9Sstevel@tonic-gate 		aggr_send_port_enable(port);
3777c478bd9Sstevel@tonic-gate 	else
3787c478bd9Sstevel@tonic-gate 		aggr_lacp_port_attached(port);
3797c478bd9Sstevel@tonic-gate 
3804deae11aSyz147064 	return (link_state_changed);
3817c478bd9Sstevel@tonic-gate }
3827c478bd9Sstevel@tonic-gate 
3837c478bd9Sstevel@tonic-gate boolean_t
aggr_grp_detach_port(aggr_grp_t * grp,aggr_port_t * port)384da14cebeSEric Cheng aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
3857c478bd9Sstevel@tonic-gate {
3864deae11aSyz147064 	boolean_t link_state_changed = B_FALSE;
3877c478bd9Sstevel@tonic-gate 
388da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
389da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
3907c478bd9Sstevel@tonic-gate 
391da14cebeSEric Cheng 	/* update state */
3927c478bd9Sstevel@tonic-gate 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
3937c478bd9Sstevel@tonic-gate 		return (B_FALSE);
394490ed22dSyz147064 
395da14cebeSEric Cheng 	mac_rx_clear(port->lp_mch);
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate 	aggr_grp_multicst_port(port, B_FALSE);
3987c478bd9Sstevel@tonic-gate 
3997c478bd9Sstevel@tonic-gate 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
4007c478bd9Sstevel@tonic-gate 		aggr_send_port_disable(port);
401da14cebeSEric Cheng 	else
4027c478bd9Sstevel@tonic-gate 		aggr_lacp_port_detached(port);
4037c478bd9Sstevel@tonic-gate 
40495c1c84bSRamesh Kumar Katla 	port->lp_state = AGGR_PORT_STATE_STANDBY;
405da14cebeSEric Cheng 
4067c478bd9Sstevel@tonic-gate 	grp->lg_nattached_ports--;
4077c478bd9Sstevel@tonic-gate 	if (grp->lg_nattached_ports == 0) {
4087c478bd9Sstevel@tonic-gate 		/* the last attached MAC port of the group is being detached */
4097c478bd9Sstevel@tonic-gate 		grp->lg_ifspeed = 0;
4107c478bd9Sstevel@tonic-gate 		grp->lg_link_state = LINK_STATE_DOWN;
4117c478bd9Sstevel@tonic-gate 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
4124deae11aSyz147064 		link_state_changed = B_TRUE;
4137c478bd9Sstevel@tonic-gate 	}
4147c478bd9Sstevel@tonic-gate 
4154deae11aSyz147064 	return (link_state_changed);
4167c478bd9Sstevel@tonic-gate }
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate /*
4197c478bd9Sstevel@tonic-gate  * Update the MAC addresses of the constituent ports of the specified
4207c478bd9Sstevel@tonic-gate  * group. This function is invoked:
4217c478bd9Sstevel@tonic-gate  * - after creating a new aggregation group.
4227c478bd9Sstevel@tonic-gate  * - after adding new ports to an aggregation group.
4237c478bd9Sstevel@tonic-gate  * - after removing a port from a group when the MAC address of
4247c478bd9Sstevel@tonic-gate  *   that port was used for the MAC address of the group.
4257c478bd9Sstevel@tonic-gate  * - after the MAC address of a port changed when the MAC address
4267c478bd9Sstevel@tonic-gate  *   of that port was used for the MAC address of the group.
4274deae11aSyz147064  *
4284deae11aSyz147064  * Return true if the link state of the aggregation changed, for example
4294deae11aSyz147064  * as a result of a failure changing the MAC address of one of the
4304deae11aSyz147064  * constituent ports.
4317c478bd9Sstevel@tonic-gate  */
4324deae11aSyz147064 boolean_t
aggr_grp_update_ports_mac(aggr_grp_t * grp)4337c478bd9Sstevel@tonic-gate aggr_grp_update_ports_mac(aggr_grp_t *grp)
4347c478bd9Sstevel@tonic-gate {
4357c478bd9Sstevel@tonic-gate 	aggr_port_t *cport;
4364deae11aSyz147064 	boolean_t link_state_changed = B_FALSE;
437da14cebeSEric Cheng 	mac_perim_handle_t mph;
4387c478bd9Sstevel@tonic-gate 
439da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
4404deae11aSyz147064 
4417c478bd9Sstevel@tonic-gate 	for (cport = grp->lg_ports; cport != NULL;
4427c478bd9Sstevel@tonic-gate 	    cport = cport->lp_next) {
443da14cebeSEric Cheng 		mac_perim_enter_by_mh(cport->lp_mh, &mph);
444da14cebeSEric Cheng 		if (aggr_port_unicst(cport) != 0) {
445da14cebeSEric Cheng 			if (aggr_grp_detach_port(grp, cport))
446392b1d6eSyz147064 				link_state_changed = B_TRUE;
4474deae11aSyz147064 		} else {
4484deae11aSyz147064 			/*
4494deae11aSyz147064 			 * If a port was detached because of a previous
4504deae11aSyz147064 			 * failure changing the MAC address, the port is
4514deae11aSyz147064 			 * reattached when it successfully changes the MAC
4524deae11aSyz147064 			 * address now, and this might cause the link state
4534deae11aSyz147064 			 * of the aggregation to change.
4544deae11aSyz147064 			 */
455392b1d6eSyz147064 			if (aggr_grp_attach_port(grp, cport))
456392b1d6eSyz147064 				link_state_changed = B_TRUE;
4577c478bd9Sstevel@tonic-gate 		}
458da14cebeSEric Cheng 		mac_perim_exit(mph);
4594deae11aSyz147064 	}
4604deae11aSyz147064 	return (link_state_changed);
4617c478bd9Sstevel@tonic-gate }
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate /*
4647c478bd9Sstevel@tonic-gate  * Invoked when the MAC address of a port has changed. If the port's
4654deae11aSyz147064  * MAC address was used for the group MAC address, set mac_addr_changedp
4664deae11aSyz147064  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
4674deae11aSyz147064  * notification. If the link state changes due to detach/attach of
4684deae11aSyz147064  * the constituent port, set link_state_changedp to B_TRUE to indicate
4694deae11aSyz147064  * to the caller that it should send a MAC_NOTE_LINK notification. In both
4704deae11aSyz147064  * cases, it is the responsibility of the caller to invoke notification
4714deae11aSyz147064  * functions after releasing the the port lock.
4727c478bd9Sstevel@tonic-gate  */
4734deae11aSyz147064 void
aggr_grp_port_mac_changed(aggr_grp_t * grp,aggr_port_t * port,boolean_t * mac_addr_changedp,boolean_t * link_state_changedp)4744deae11aSyz147064 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
4754deae11aSyz147064     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
4767c478bd9Sstevel@tonic-gate {
477da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
478da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
4794deae11aSyz147064 	ASSERT(mac_addr_changedp != NULL);
4804deae11aSyz147064 	ASSERT(link_state_changedp != NULL);
4814deae11aSyz147064 
4824deae11aSyz147064 	*mac_addr_changedp = B_FALSE;
4834deae11aSyz147064 	*link_state_changedp = B_FALSE;
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 	if (grp->lg_addr_fixed) {
4867c478bd9Sstevel@tonic-gate 		/*
4877c478bd9Sstevel@tonic-gate 		 * The group is using a fixed MAC address or an automatic
4887c478bd9Sstevel@tonic-gate 		 * MAC address has not been set.
4897c478bd9Sstevel@tonic-gate 		 */
4904deae11aSyz147064 		return;
4917c478bd9Sstevel@tonic-gate 	}
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 	if (grp->lg_mac_addr_port == port) {
4947c478bd9Sstevel@tonic-gate 		/*
4957c478bd9Sstevel@tonic-gate 		 * The MAC address of the port was assigned to the group
4967c478bd9Sstevel@tonic-gate 		 * MAC address. Update the group MAC address.
4977c478bd9Sstevel@tonic-gate 		 */
4987c478bd9Sstevel@tonic-gate 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
4994deae11aSyz147064 		*mac_addr_changedp = B_TRUE;
5007c478bd9Sstevel@tonic-gate 	} else {
5017c478bd9Sstevel@tonic-gate 		/*
5027c478bd9Sstevel@tonic-gate 		 * Update the actual port MAC address to the MAC address
5037c478bd9Sstevel@tonic-gate 		 * of the group.
5047c478bd9Sstevel@tonic-gate 		 */
505da14cebeSEric Cheng 		if (aggr_port_unicst(port) != 0) {
506da14cebeSEric Cheng 			*link_state_changedp = aggr_grp_detach_port(grp, port);
5074deae11aSyz147064 		} else {
5084deae11aSyz147064 			/*
5094deae11aSyz147064 			 * If a port was detached because of a previous
5104deae11aSyz147064 			 * failure changing the MAC address, the port is
5114deae11aSyz147064 			 * reattached when it successfully changes the MAC
5124deae11aSyz147064 			 * address now, and this might cause the link state
5134deae11aSyz147064 			 * of the aggregation to change.
5144deae11aSyz147064 			 */
5154deae11aSyz147064 			*link_state_changedp = aggr_grp_attach_port(grp, port);
5167c478bd9Sstevel@tonic-gate 		}
5174deae11aSyz147064 	}
5187c478bd9Sstevel@tonic-gate }
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate /*
5217c478bd9Sstevel@tonic-gate  * Add a port to a link aggregation group.
5227c478bd9Sstevel@tonic-gate  */
5237c478bd9Sstevel@tonic-gate static int
aggr_grp_add_port(aggr_grp_t * grp,datalink_id_t port_linkid,boolean_t force,aggr_port_t ** pp)524da14cebeSEric Cheng aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
525d62bc4baSyz147064     aggr_port_t **pp)
5267c478bd9Sstevel@tonic-gate {
5277c478bd9Sstevel@tonic-gate 	aggr_port_t *port, **cport;
528da14cebeSEric Cheng 	mac_perim_handle_t mph;
5292b24ab6bSSebastien Roy 	zoneid_t port_zoneid = ALL_ZONES;
5307c478bd9Sstevel@tonic-gate 	int err;
5317c478bd9Sstevel@tonic-gate 
5322b24ab6bSSebastien Roy 	/* The port must be int the same zone as the aggregation. */
5332b24ab6bSSebastien Roy 	if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
5342b24ab6bSSebastien Roy 		port_zoneid = GLOBAL_ZONEID;
5352b24ab6bSSebastien Roy 	if (grp->lg_zoneid != port_zoneid)
5362b24ab6bSSebastien Roy 		return (EBUSY);
5372b24ab6bSSebastien Roy 
538da14cebeSEric Cheng 	/*
539da14cebeSEric Cheng 	 * lg_mh could be NULL when the function is called during the creation
540da14cebeSEric Cheng 	 * of the aggregation.
541da14cebeSEric Cheng 	 */
542da14cebeSEric Cheng 	ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
5437c478bd9Sstevel@tonic-gate 
5447c478bd9Sstevel@tonic-gate 	/* create new port */
545da14cebeSEric Cheng 	err = aggr_port_create(grp, port_linkid, force, &port);
5467c478bd9Sstevel@tonic-gate 	if (err != 0)
5477c478bd9Sstevel@tonic-gate 		return (err);
5487c478bd9Sstevel@tonic-gate 
549da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &mph);
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate 	/* add port to list of group constituent ports */
5527c478bd9Sstevel@tonic-gate 	cport = &grp->lg_ports;
5537c478bd9Sstevel@tonic-gate 	while (*cport != NULL)
5547c478bd9Sstevel@tonic-gate 		cport = &((*cport)->lp_next);
5557c478bd9Sstevel@tonic-gate 	*cport = port;
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 	/*
5587c478bd9Sstevel@tonic-gate 	 * Back reference to the group it is member of. A port always
5597c478bd9Sstevel@tonic-gate 	 * holds a reference to its group to ensure that the back
5607c478bd9Sstevel@tonic-gate 	 * reference is always valid.
5617c478bd9Sstevel@tonic-gate 	 */
5627c478bd9Sstevel@tonic-gate 	port->lp_grp = grp;
5637c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
5647c478bd9Sstevel@tonic-gate 	grp->lg_nports++;
5657c478bd9Sstevel@tonic-gate 
5667c478bd9Sstevel@tonic-gate 	aggr_lacp_init_port(port);
567da14cebeSEric Cheng 	mac_perim_exit(mph);
5687c478bd9Sstevel@tonic-gate 
5697c478bd9Sstevel@tonic-gate 	if (pp != NULL)
5707c478bd9Sstevel@tonic-gate 		*pp = port;
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate 	return (0);
5737c478bd9Sstevel@tonic-gate }
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate /*
576*f8bfcf82SRobert Mustacchi  * This is called in response to either our LACP state machine or a MAC
577*f8bfcf82SRobert Mustacchi  * notification that the link has gone down via aggr_send_port_disable(). At
578*f8bfcf82SRobert Mustacchi  * this point, we may need to update our default ring. To that end, we go
579*f8bfcf82SRobert Mustacchi  * through the set of ports (underlying datalinks in an aggregation) that are
580*f8bfcf82SRobert Mustacchi  * currently enabled to transmit data. If all our links have been disabled for
581*f8bfcf82SRobert Mustacchi  * transmit, then we don't do anything.
582*f8bfcf82SRobert Mustacchi  *
583*f8bfcf82SRobert Mustacchi  * Note, because we only have a single TX group, we don't have to worry about
584*f8bfcf82SRobert Mustacchi  * the rings moving between groups and the chance that mac will reassign it
585*f8bfcf82SRobert Mustacchi  * unless someone removes a port, at which point, we play it safe and call this
586*f8bfcf82SRobert Mustacchi  * again.
587*f8bfcf82SRobert Mustacchi  */
588*f8bfcf82SRobert Mustacchi void
aggr_grp_update_default(aggr_grp_t * grp)589*f8bfcf82SRobert Mustacchi aggr_grp_update_default(aggr_grp_t *grp)
590*f8bfcf82SRobert Mustacchi {
591*f8bfcf82SRobert Mustacchi 	aggr_port_t *port;
592*f8bfcf82SRobert Mustacchi 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
593*f8bfcf82SRobert Mustacchi 
594*f8bfcf82SRobert Mustacchi 	rw_enter(&grp->lg_tx_lock, RW_WRITER);
595*f8bfcf82SRobert Mustacchi 
596*f8bfcf82SRobert Mustacchi 	if (grp->lg_ntx_ports == 0) {
597*f8bfcf82SRobert Mustacchi 		rw_exit(&grp->lg_tx_lock);
598*f8bfcf82SRobert Mustacchi 		return;
599*f8bfcf82SRobert Mustacchi 	}
600*f8bfcf82SRobert Mustacchi 
601*f8bfcf82SRobert Mustacchi 	port = grp->lg_tx_ports[0];
602*f8bfcf82SRobert Mustacchi 	ASSERT(port->lp_tx_ring_cnt > 0);
603*f8bfcf82SRobert Mustacchi 	mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
604*f8bfcf82SRobert Mustacchi 	rw_exit(&grp->lg_tx_lock);
605*f8bfcf82SRobert Mustacchi }
606*f8bfcf82SRobert Mustacchi 
607*f8bfcf82SRobert Mustacchi /*
6080dc2366fSVenugopal Iyer  * Add a pseudo RX ring for the given HW ring handle.
609da14cebeSEric Cheng  */
610da14cebeSEric Cheng static int
aggr_add_pseudo_rx_ring(aggr_port_t * port,aggr_pseudo_rx_group_t * rx_grp,mac_ring_handle_t hw_rh)611da14cebeSEric Cheng aggr_add_pseudo_rx_ring(aggr_port_t *port,
612da14cebeSEric Cheng     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
613da14cebeSEric Cheng {
614da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t	*ring;
615da14cebeSEric Cheng 	int			err;
616da14cebeSEric Cheng 	int			j;
617da14cebeSEric Cheng 
618da14cebeSEric Cheng 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
619da14cebeSEric Cheng 		ring = rx_grp->arg_rings + j;
620da14cebeSEric Cheng 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
621da14cebeSEric Cheng 			break;
622da14cebeSEric Cheng 	}
623da14cebeSEric Cheng 
624da14cebeSEric Cheng 	/*
6250dc2366fSVenugopal Iyer 	 * No slot for this new RX ring.
626da14cebeSEric Cheng 	 */
627da14cebeSEric Cheng 	if (j == MAX_RINGS_PER_GROUP)
628da14cebeSEric Cheng 		return (EIO);
629da14cebeSEric Cheng 
630da14cebeSEric Cheng 	ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
631da14cebeSEric Cheng 	ring->arr_hw_rh = hw_rh;
632da14cebeSEric Cheng 	ring->arr_port = port;
633da14cebeSEric Cheng 	rx_grp->arg_ring_cnt++;
634da14cebeSEric Cheng 
635da14cebeSEric Cheng 	/*
636da14cebeSEric Cheng 	 * The group is already registered, dynamically add a new ring to the
637da14cebeSEric Cheng 	 * mac group.
638da14cebeSEric Cheng 	 */
639da14cebeSEric Cheng 	if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
640da14cebeSEric Cheng 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
641da14cebeSEric Cheng 		ring->arr_hw_rh = NULL;
642da14cebeSEric Cheng 		ring->arr_port = NULL;
643da14cebeSEric Cheng 		rx_grp->arg_ring_cnt--;
6440dc2366fSVenugopal Iyer 	} else {
6450dc2366fSVenugopal Iyer 		mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
6460dc2366fSVenugopal Iyer 		    mac_find_ring(rx_grp->arg_gh, j));
647da14cebeSEric Cheng 	}
648da14cebeSEric Cheng 	return (err);
649da14cebeSEric Cheng }
650da14cebeSEric Cheng 
651da14cebeSEric Cheng /*
6520dc2366fSVenugopal Iyer  * Remove the pseudo RX ring of the given HW ring handle.
653da14cebeSEric Cheng  */
654da14cebeSEric Cheng static void
aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t * rx_grp,mac_ring_handle_t hw_rh)655da14cebeSEric Cheng aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
656da14cebeSEric Cheng {
657da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t	*ring;
658da14cebeSEric Cheng 	int			j;
659da14cebeSEric Cheng 
660da14cebeSEric Cheng 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
661da14cebeSEric Cheng 		ring = rx_grp->arg_rings + j;
662da14cebeSEric Cheng 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
663da14cebeSEric Cheng 		    ring->arr_hw_rh != hw_rh) {
664da14cebeSEric Cheng 			continue;
665da14cebeSEric Cheng 		}
666da14cebeSEric Cheng 
667da14cebeSEric Cheng 		mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
668da14cebeSEric Cheng 
669da14cebeSEric Cheng 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
670da14cebeSEric Cheng 		ring->arr_hw_rh = NULL;
671da14cebeSEric Cheng 		ring->arr_port = NULL;
672da14cebeSEric Cheng 		rx_grp->arg_ring_cnt--;
673da14cebeSEric Cheng 		mac_hwring_teardown(hw_rh);
674da14cebeSEric Cheng 		break;
675da14cebeSEric Cheng 	}
676da14cebeSEric Cheng }
677da14cebeSEric Cheng 
678da14cebeSEric Cheng /*
679da14cebeSEric Cheng  * This function is called to create pseudo rings over the hardware rings of
680da14cebeSEric Cheng  * the underlying device. Note that there is a 1:1 mapping between the pseudo
681da14cebeSEric Cheng  * RX rings of the aggr and the hardware rings of the underlying port.
682da14cebeSEric Cheng  */
683da14cebeSEric Cheng static int
aggr_add_pseudo_rx_group(aggr_port_t * port,aggr_pseudo_rx_group_t * rx_grp)684da14cebeSEric Cheng aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
685da14cebeSEric Cheng {
686da14cebeSEric Cheng 	aggr_grp_t		*grp = port->lp_grp;
687da14cebeSEric Cheng 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
688da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr, *a;
689da14cebeSEric Cheng 	mac_perim_handle_t	pmph;
690da14cebeSEric Cheng 	int			hw_rh_cnt, i = 0, j;
691da14cebeSEric Cheng 	int			err = 0;
692da14cebeSEric Cheng 
693da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
694da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
695da14cebeSEric Cheng 
696da14cebeSEric Cheng 	/*
697da14cebeSEric Cheng 	 * This function must be called after the aggr registers its mac
698da14cebeSEric Cheng 	 * and its RX group has been initialized.
699da14cebeSEric Cheng 	 */
700da14cebeSEric Cheng 	ASSERT(rx_grp->arg_gh != NULL);
701da14cebeSEric Cheng 
702da14cebeSEric Cheng 	/*
703da14cebeSEric Cheng 	 * Get the list the the underlying HW rings.
704da14cebeSEric Cheng 	 */
7050dc2366fSVenugopal Iyer 	hw_rh_cnt = mac_hwrings_get(port->lp_mch,
7060dc2366fSVenugopal Iyer 	    &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
707da14cebeSEric Cheng 
708da14cebeSEric Cheng 	if (port->lp_hwgh != NULL) {
709da14cebeSEric Cheng 		/*
710da14cebeSEric Cheng 		 * Quiesce the HW ring and the mac srs on the ring. Note
711da14cebeSEric Cheng 		 * that the HW ring will be restarted when the pseudo ring
712da14cebeSEric Cheng 		 * is started. At that time all the packets will be
713da14cebeSEric Cheng 		 * directly passed up to the pseudo RX ring and handled
714da14cebeSEric Cheng 		 * by mac srs created over the pseudo RX ring.
715da14cebeSEric Cheng 		 */
716da14cebeSEric Cheng 		mac_rx_client_quiesce(port->lp_mch);
717da14cebeSEric Cheng 		mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
718da14cebeSEric Cheng 	}
719da14cebeSEric Cheng 
720da14cebeSEric Cheng 	/*
721da14cebeSEric Cheng 	 * Add all the unicast addresses to the newly added port.
722da14cebeSEric Cheng 	 */
723da14cebeSEric Cheng 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
724da14cebeSEric Cheng 		if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
725da14cebeSEric Cheng 			break;
726da14cebeSEric Cheng 	}
727da14cebeSEric Cheng 
728da14cebeSEric Cheng 	for (i = 0; err == 0 && i < hw_rh_cnt; i++)
729da14cebeSEric Cheng 		err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
730da14cebeSEric Cheng 
731da14cebeSEric Cheng 	if (err != 0) {
732da14cebeSEric Cheng 		for (j = 0; j < i; j++)
733da14cebeSEric Cheng 			aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
734da14cebeSEric Cheng 
735da14cebeSEric Cheng 		for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
736da14cebeSEric Cheng 			aggr_port_remmac(port, a->aua_addr);
737da14cebeSEric Cheng 
738da14cebeSEric Cheng 		if (port->lp_hwgh != NULL) {
739da14cebeSEric Cheng 			mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
740da14cebeSEric Cheng 			mac_rx_client_restart(port->lp_mch);
741da14cebeSEric Cheng 			port->lp_hwgh = NULL;
742da14cebeSEric Cheng 		}
743da14cebeSEric Cheng 	} else {
7440dc2366fSVenugopal Iyer 		port->lp_rx_grp_added = B_TRUE;
745da14cebeSEric Cheng 	}
746da14cebeSEric Cheng done:
747da14cebeSEric Cheng 	mac_perim_exit(pmph);
748da14cebeSEric Cheng 	return (err);
749da14cebeSEric Cheng }
750da14cebeSEric Cheng 
751da14cebeSEric Cheng /*
752da14cebeSEric Cheng  * This function is called by aggr to remove pseudo RX rings over the
753da14cebeSEric Cheng  * HW rings of the underlying port.
754da14cebeSEric Cheng  */
755da14cebeSEric Cheng static void
aggr_rem_pseudo_rx_group(aggr_port_t * port,aggr_pseudo_rx_group_t * rx_grp)756da14cebeSEric Cheng aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
757da14cebeSEric Cheng {
758da14cebeSEric Cheng 	aggr_grp_t		*grp = port->lp_grp;
759da14cebeSEric Cheng 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
760da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr;
761da14cebeSEric Cheng 	mac_group_handle_t	hwgh;
762da14cebeSEric Cheng 	mac_perim_handle_t	pmph;
763da14cebeSEric Cheng 	int			hw_rh_cnt, i;
764da14cebeSEric Cheng 
765da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
766da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
767da14cebeSEric Cheng 
7680dc2366fSVenugopal Iyer 	if (!port->lp_rx_grp_added)
769da14cebeSEric Cheng 		goto done;
770da14cebeSEric Cheng 
771da14cebeSEric Cheng 	ASSERT(rx_grp->arg_gh != NULL);
7720dc2366fSVenugopal Iyer 	hw_rh_cnt = mac_hwrings_get(port->lp_mch,
7730dc2366fSVenugopal Iyer 	    &hwgh, hw_rh, MAC_RING_TYPE_RX);
774da14cebeSEric Cheng 
775da14cebeSEric Cheng 	/*
776da14cebeSEric Cheng 	 * If hw_rh_cnt is 0, it means that the underlying port does not
777da14cebeSEric Cheng 	 * support RX rings. Directly return in this case.
778da14cebeSEric Cheng 	 */
779da14cebeSEric Cheng 	for (i = 0; i < hw_rh_cnt; i++)
780da14cebeSEric Cheng 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
781da14cebeSEric Cheng 
782da14cebeSEric Cheng 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
783da14cebeSEric Cheng 		aggr_port_remmac(port, addr->aua_addr);
784da14cebeSEric Cheng 
785da14cebeSEric Cheng 	if (port->lp_hwgh != NULL) {
786da14cebeSEric Cheng 		port->lp_hwgh = NULL;
787da14cebeSEric Cheng 
788da14cebeSEric Cheng 		/*
789da14cebeSEric Cheng 		 * First clear the permanent-quiesced flag of the RX srs then
790da14cebeSEric Cheng 		 * restart the HW ring and the mac srs on the ring. Note that
791da14cebeSEric Cheng 		 * the HW ring and associated SRS will soon been removed when
792da14cebeSEric Cheng 		 * the port is removed from the aggr.
793da14cebeSEric Cheng 		 */
794da14cebeSEric Cheng 		mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
795da14cebeSEric Cheng 		mac_rx_client_restart(port->lp_mch);
796da14cebeSEric Cheng 	}
797da14cebeSEric Cheng 
7980dc2366fSVenugopal Iyer 	port->lp_rx_grp_added = B_FALSE;
7990dc2366fSVenugopal Iyer done:
8000dc2366fSVenugopal Iyer 	mac_perim_exit(pmph);
8010dc2366fSVenugopal Iyer }
8020dc2366fSVenugopal Iyer 
8030dc2366fSVenugopal Iyer /*
8040dc2366fSVenugopal Iyer  * Add a pseudo TX ring for the given HW ring handle.
8050dc2366fSVenugopal Iyer  */
8060dc2366fSVenugopal Iyer static int
aggr_add_pseudo_tx_ring(aggr_port_t * port,aggr_pseudo_tx_group_t * tx_grp,mac_ring_handle_t hw_rh,mac_ring_handle_t * pseudo_rh)8070dc2366fSVenugopal Iyer aggr_add_pseudo_tx_ring(aggr_port_t *port,
8080dc2366fSVenugopal Iyer     aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh,
8090dc2366fSVenugopal Iyer     mac_ring_handle_t *pseudo_rh)
8100dc2366fSVenugopal Iyer {
8110dc2366fSVenugopal Iyer 	aggr_pseudo_tx_ring_t	*ring;
8120dc2366fSVenugopal Iyer 	int			err;
8130dc2366fSVenugopal Iyer 	int			i;
8140dc2366fSVenugopal Iyer 
8150dc2366fSVenugopal Iyer 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
8160dc2366fSVenugopal Iyer 	for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
8170dc2366fSVenugopal Iyer 		ring = tx_grp->atg_rings + i;
8180dc2366fSVenugopal Iyer 		if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE))
8190dc2366fSVenugopal Iyer 			break;
8200dc2366fSVenugopal Iyer 	}
8210dc2366fSVenugopal Iyer 	/*
8220dc2366fSVenugopal Iyer 	 * No slot for this new TX ring.
8230dc2366fSVenugopal Iyer 	 */
8240dc2366fSVenugopal Iyer 	if (i == MAX_RINGS_PER_GROUP)
8250dc2366fSVenugopal Iyer 		return (EIO);
8260dc2366fSVenugopal Iyer 	/*
8270dc2366fSVenugopal Iyer 	 * The following 4 statements needs to be done before
8280dc2366fSVenugopal Iyer 	 * calling mac_group_add_ring(). Otherwise it will
8290dc2366fSVenugopal Iyer 	 * result in an assertion failure in mac_init_ring().
8300dc2366fSVenugopal Iyer 	 */
8310dc2366fSVenugopal Iyer 	ring->atr_flags |= MAC_PSEUDO_RING_INUSE;
8320dc2366fSVenugopal Iyer 	ring->atr_hw_rh = hw_rh;
8330dc2366fSVenugopal Iyer 	ring->atr_port = port;
8340dc2366fSVenugopal Iyer 	tx_grp->atg_ring_cnt++;
8350dc2366fSVenugopal Iyer 
8360dc2366fSVenugopal Iyer 	/*
8370dc2366fSVenugopal Iyer 	 * The TX side has no concept of ring groups unlike RX groups.
8380dc2366fSVenugopal Iyer 	 * There is just a single group which stores all the TX rings.
8390dc2366fSVenugopal Iyer 	 * This group will be used to store aggr's pseudo TX rings.
8400dc2366fSVenugopal Iyer 	 */
8410dc2366fSVenugopal Iyer 	if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
8420dc2366fSVenugopal Iyer 		ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
8430dc2366fSVenugopal Iyer 		ring->atr_hw_rh = NULL;
8440dc2366fSVenugopal Iyer 		ring->atr_port = NULL;
8450dc2366fSVenugopal Iyer 		tx_grp->atg_ring_cnt--;
8460dc2366fSVenugopal Iyer 	} else {
8470dc2366fSVenugopal Iyer 		*pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
8480dc2366fSVenugopal Iyer 		if (hw_rh != NULL) {
8490dc2366fSVenugopal Iyer 			mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
8500dc2366fSVenugopal Iyer 			    mac_find_ring(tx_grp->atg_gh, i));
8510dc2366fSVenugopal Iyer 		}
8520dc2366fSVenugopal Iyer 	}
853*f8bfcf82SRobert Mustacchi 
8540dc2366fSVenugopal Iyer 	return (err);
8550dc2366fSVenugopal Iyer }
8560dc2366fSVenugopal Iyer 
8570dc2366fSVenugopal Iyer /*
8580dc2366fSVenugopal Iyer  * Remove the pseudo TX ring of the given HW ring handle.
8590dc2366fSVenugopal Iyer  */
8600dc2366fSVenugopal Iyer static void
aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t * tx_grp,mac_ring_handle_t pseudo_hw_rh)8610dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
8620dc2366fSVenugopal Iyer     mac_ring_handle_t pseudo_hw_rh)
8630dc2366fSVenugopal Iyer {
8640dc2366fSVenugopal Iyer 	aggr_pseudo_tx_ring_t	*ring;
8650dc2366fSVenugopal Iyer 	int			i;
8660dc2366fSVenugopal Iyer 
8670dc2366fSVenugopal Iyer 	for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
8680dc2366fSVenugopal Iyer 		ring = tx_grp->atg_rings + i;
8690dc2366fSVenugopal Iyer 		if (ring->atr_rh != pseudo_hw_rh)
8700dc2366fSVenugopal Iyer 			continue;
8710dc2366fSVenugopal Iyer 
8720dc2366fSVenugopal Iyer 		ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
8730dc2366fSVenugopal Iyer 		mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);
8740dc2366fSVenugopal Iyer 		ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
8750dc2366fSVenugopal Iyer 		mac_hwring_teardown(ring->atr_hw_rh);
8760dc2366fSVenugopal Iyer 		ring->atr_hw_rh = NULL;
8770dc2366fSVenugopal Iyer 		ring->atr_port = NULL;
8780dc2366fSVenugopal Iyer 		tx_grp->atg_ring_cnt--;
8790dc2366fSVenugopal Iyer 		break;
8800dc2366fSVenugopal Iyer 	}
8810dc2366fSVenugopal Iyer }
8820dc2366fSVenugopal Iyer 
8830dc2366fSVenugopal Iyer /*
8840dc2366fSVenugopal Iyer  * This function is called to create pseudo rings over hardware rings of
8850dc2366fSVenugopal Iyer  * the underlying device. There is a 1:1 mapping between the pseudo TX
8860dc2366fSVenugopal Iyer  * rings of the aggr and the hardware rings of the underlying port.
8870dc2366fSVenugopal Iyer  */
8880dc2366fSVenugopal Iyer static int
aggr_add_pseudo_tx_group(aggr_port_t * port,aggr_pseudo_tx_group_t * tx_grp)8890dc2366fSVenugopal Iyer aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
8900dc2366fSVenugopal Iyer {
8910dc2366fSVenugopal Iyer 	aggr_grp_t		*grp = port->lp_grp;
8920dc2366fSVenugopal Iyer 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh;
8930dc2366fSVenugopal Iyer 	mac_perim_handle_t	pmph;
8940dc2366fSVenugopal Iyer 	int			hw_rh_cnt, i = 0, j;
8950dc2366fSVenugopal Iyer 	int			err = 0;
8960dc2366fSVenugopal Iyer 
8970dc2366fSVenugopal Iyer 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
8980dc2366fSVenugopal Iyer 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
8990dc2366fSVenugopal Iyer 
9000dc2366fSVenugopal Iyer 	/*
9010dc2366fSVenugopal Iyer 	 * Get the list the the underlying HW rings.
9020dc2366fSVenugopal Iyer 	 */
9030dc2366fSVenugopal Iyer 	hw_rh_cnt = mac_hwrings_get(port->lp_mch,
9040dc2366fSVenugopal Iyer 	    NULL, hw_rh, MAC_RING_TYPE_TX);
9050dc2366fSVenugopal Iyer 
9060dc2366fSVenugopal Iyer 	/*
9070dc2366fSVenugopal Iyer 	 * Even if the underlying NIC does not have TX rings, we
9080dc2366fSVenugopal Iyer 	 * still make a psuedo TX ring for that NIC with NULL as
9090dc2366fSVenugopal Iyer 	 * the ring handle.
9100dc2366fSVenugopal Iyer 	 */
9110dc2366fSVenugopal Iyer 	if (hw_rh_cnt == 0)
9120dc2366fSVenugopal Iyer 		port->lp_tx_ring_cnt = 1;
9130dc2366fSVenugopal Iyer 	else
9140dc2366fSVenugopal Iyer 		port->lp_tx_ring_cnt = hw_rh_cnt;
9150dc2366fSVenugopal Iyer 
9160dc2366fSVenugopal Iyer 	port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
9170dc2366fSVenugopal Iyer 	    port->lp_tx_ring_cnt), KM_SLEEP);
9180dc2366fSVenugopal Iyer 	port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
9190dc2366fSVenugopal Iyer 	    port->lp_tx_ring_cnt), KM_SLEEP);
9200dc2366fSVenugopal Iyer 
9210dc2366fSVenugopal Iyer 	if (hw_rh_cnt == 0) {
9220dc2366fSVenugopal Iyer 		if ((err = aggr_add_pseudo_tx_ring(port, tx_grp,
9230dc2366fSVenugopal Iyer 		    NULL, &pseudo_rh)) == 0) {
9240dc2366fSVenugopal Iyer 			port->lp_tx_rings[0] = NULL;
9250dc2366fSVenugopal Iyer 			port->lp_pseudo_tx_rings[0] = pseudo_rh;
9260dc2366fSVenugopal Iyer 		}
9270dc2366fSVenugopal Iyer 	} else {
9280dc2366fSVenugopal Iyer 		for (i = 0; err == 0 && i < hw_rh_cnt; i++) {
9290dc2366fSVenugopal Iyer 			err = aggr_add_pseudo_tx_ring(port,
9300dc2366fSVenugopal Iyer 			    tx_grp, hw_rh[i], &pseudo_rh);
9310dc2366fSVenugopal Iyer 			if (err != 0)
9320dc2366fSVenugopal Iyer 				break;
9330dc2366fSVenugopal Iyer 			port->lp_tx_rings[i] = hw_rh[i];
9340dc2366fSVenugopal Iyer 			port->lp_pseudo_tx_rings[i] = pseudo_rh;
9350dc2366fSVenugopal Iyer 		}
9360dc2366fSVenugopal Iyer 	}
9370dc2366fSVenugopal Iyer 
9380dc2366fSVenugopal Iyer 	if (err != 0) {
9390dc2366fSVenugopal Iyer 		if (hw_rh_cnt != 0) {
9400dc2366fSVenugopal Iyer 			for (j = 0; j < i; j++) {
9410dc2366fSVenugopal Iyer 				aggr_rem_pseudo_tx_ring(tx_grp,
9420dc2366fSVenugopal Iyer 				    port->lp_pseudo_tx_rings[j]);
9430dc2366fSVenugopal Iyer 			}
9440dc2366fSVenugopal Iyer 		}
9450dc2366fSVenugopal Iyer 		kmem_free(port->lp_tx_rings,
9460dc2366fSVenugopal Iyer 		    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
9470dc2366fSVenugopal Iyer 		kmem_free(port->lp_pseudo_tx_rings,
9480dc2366fSVenugopal Iyer 		    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
9490dc2366fSVenugopal Iyer 		port->lp_tx_ring_cnt = 0;
9500dc2366fSVenugopal Iyer 	} else {
9510dc2366fSVenugopal Iyer 		port->lp_tx_grp_added = B_TRUE;
9520dc2366fSVenugopal Iyer 		port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
9530dc2366fSVenugopal Iyer 		    aggr_tx_ring_update, port);
9540dc2366fSVenugopal Iyer 	}
9550dc2366fSVenugopal Iyer 	mac_perim_exit(pmph);
956*f8bfcf82SRobert Mustacchi 	aggr_grp_update_default(grp);
9570dc2366fSVenugopal Iyer 	return (err);
9580dc2366fSVenugopal Iyer }
9590dc2366fSVenugopal Iyer 
9600dc2366fSVenugopal Iyer /*
9610dc2366fSVenugopal Iyer  * This function is called by aggr to remove pseudo TX rings over the
9620dc2366fSVenugopal Iyer  * HW rings of the underlying port.
9630dc2366fSVenugopal Iyer  */
9640dc2366fSVenugopal Iyer static void
aggr_rem_pseudo_tx_group(aggr_port_t * port,aggr_pseudo_tx_group_t * tx_grp)9650dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
9660dc2366fSVenugopal Iyer {
9670dc2366fSVenugopal Iyer 	aggr_grp_t		*grp = port->lp_grp;
9680dc2366fSVenugopal Iyer 	mac_perim_handle_t	pmph;
9690dc2366fSVenugopal Iyer 	int			i;
9700dc2366fSVenugopal Iyer 
9710dc2366fSVenugopal Iyer 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
9720dc2366fSVenugopal Iyer 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
9730dc2366fSVenugopal Iyer 
9740dc2366fSVenugopal Iyer 	if (!port->lp_tx_grp_added)
9750dc2366fSVenugopal Iyer 		goto done;
9760dc2366fSVenugopal Iyer 
9770dc2366fSVenugopal Iyer 	ASSERT(tx_grp->atg_gh != NULL);
9780dc2366fSVenugopal Iyer 
9790dc2366fSVenugopal Iyer 	for (i = 0; i < port->lp_tx_ring_cnt; i++)
9800dc2366fSVenugopal Iyer 		aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
9810dc2366fSVenugopal Iyer 
9820dc2366fSVenugopal Iyer 	kmem_free(port->lp_tx_rings,
9830dc2366fSVenugopal Iyer 	    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
9840dc2366fSVenugopal Iyer 	kmem_free(port->lp_pseudo_tx_rings,
9850dc2366fSVenugopal Iyer 	    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
9860dc2366fSVenugopal Iyer 
9870dc2366fSVenugopal Iyer 	port->lp_tx_ring_cnt = 0;
9880dc2366fSVenugopal Iyer 	(void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
9890dc2366fSVenugopal Iyer 	port->lp_tx_grp_added = B_FALSE;
990*f8bfcf82SRobert Mustacchi 	aggr_grp_update_default(grp);
991da14cebeSEric Cheng done:
992da14cebeSEric Cheng 	mac_perim_exit(pmph);
993da14cebeSEric Cheng }
994da14cebeSEric Cheng 
995da14cebeSEric Cheng static int
aggr_pseudo_disable_intr(mac_intr_handle_t ih)996da14cebeSEric Cheng aggr_pseudo_disable_intr(mac_intr_handle_t ih)
997da14cebeSEric Cheng {
998da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
999da14cebeSEric Cheng 	return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
1000da14cebeSEric Cheng }
1001da14cebeSEric Cheng 
1002da14cebeSEric Cheng static int
aggr_pseudo_enable_intr(mac_intr_handle_t ih)1003da14cebeSEric Cheng aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1004da14cebeSEric Cheng {
1005da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1006da14cebeSEric Cheng 	return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1007da14cebeSEric Cheng }
1008da14cebeSEric Cheng 
1009da14cebeSEric Cheng static int
aggr_pseudo_start_ring(mac_ring_driver_t arg,uint64_t mr_gen)1010da14cebeSEric Cheng aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1011da14cebeSEric Cheng {
1012da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1013da14cebeSEric Cheng 	int err;
1014da14cebeSEric Cheng 
1015da14cebeSEric Cheng 	err = mac_hwring_start(rr_ring->arr_hw_rh);
1016da14cebeSEric Cheng 	if (err == 0)
1017da14cebeSEric Cheng 		rr_ring->arr_gen = mr_gen;
1018da14cebeSEric Cheng 	return (err);
1019da14cebeSEric Cheng }
1020da14cebeSEric Cheng 
1021da14cebeSEric Cheng static void
aggr_pseudo_stop_ring(mac_ring_driver_t arg)1022da14cebeSEric Cheng aggr_pseudo_stop_ring(mac_ring_driver_t arg)
1023da14cebeSEric Cheng {
1024da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1025da14cebeSEric Cheng 	mac_hwring_stop(rr_ring->arr_hw_rh);
1026da14cebeSEric Cheng }
1027da14cebeSEric Cheng 
1028da14cebeSEric Cheng /*
10297c478bd9Sstevel@tonic-gate  * Add one or more ports to an existing link aggregation group.
10307c478bd9Sstevel@tonic-gate  */
10317c478bd9Sstevel@tonic-gate int
aggr_grp_add_ports(datalink_id_t linkid,uint_t nports,boolean_t force,laioc_port_t * ports)1032d62bc4baSyz147064 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
1033d62bc4baSyz147064     laioc_port_t *ports)
10347c478bd9Sstevel@tonic-gate {
10357c478bd9Sstevel@tonic-gate 	int rc, i, nadded = 0;
10367c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = NULL;
10377c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
1038c615009fSyz147064 	boolean_t link_state_changed = B_FALSE;
1039da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
10407c478bd9Sstevel@tonic-gate 
1041d62bc4baSyz147064 	/* get group corresponding to linkid */
1042210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_READER);
1043d62bc4baSyz147064 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1044210db224Sericheng 	    (mod_hash_val_t *)&grp) != 0) {
1045210db224Sericheng 		rw_exit(&aggr_grp_lock);
1046210db224Sericheng 		return (ENOENT);
10477c478bd9Sstevel@tonic-gate 	}
10487c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
10497c478bd9Sstevel@tonic-gate 
1050da14cebeSEric Cheng 	/*
1051da14cebeSEric Cheng 	 * Hold the perimeter so that the aggregation won't be destroyed.
1052da14cebeSEric Cheng 	 */
1053da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1054da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
10557c478bd9Sstevel@tonic-gate 
10567c478bd9Sstevel@tonic-gate 	/* add the specified ports to group */
10577c478bd9Sstevel@tonic-gate 	for (i = 0; i < nports; i++) {
10587c478bd9Sstevel@tonic-gate 		/* add port to group */
1059d62bc4baSyz147064 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
1060d62bc4baSyz147064 		    force, &port)) != 0) {
10617c478bd9Sstevel@tonic-gate 			goto bail;
1062ba2e4443Sseb 		}
10637c478bd9Sstevel@tonic-gate 		ASSERT(port != NULL);
10647c478bd9Sstevel@tonic-gate 		nadded++;
10657c478bd9Sstevel@tonic-gate 
10667c478bd9Sstevel@tonic-gate 		/* check capabilities */
1067f4420ae7Snd99603 		if (!aggr_grp_capab_check(grp, port) ||
1068d62bc4baSyz147064 		    !aggr_grp_sdu_check(grp, port) ||
1069d62bc4baSyz147064 		    !aggr_grp_margin_check(grp, port)) {
10707c478bd9Sstevel@tonic-gate 			rc = ENOTSUP;
10717c478bd9Sstevel@tonic-gate 			goto bail;
10727c478bd9Sstevel@tonic-gate 		}
10737c478bd9Sstevel@tonic-gate 
1074da14cebeSEric Cheng 		/*
1075da14cebeSEric Cheng 		 * Create the pseudo ring for each HW ring of the underlying
1076da14cebeSEric Cheng 		 * port.
1077da14cebeSEric Cheng 		 */
10780dc2366fSVenugopal Iyer 		rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group);
10790dc2366fSVenugopal Iyer 		if (rc != 0)
10800dc2366fSVenugopal Iyer 			goto bail;
1081da14cebeSEric Cheng 		rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
1082da14cebeSEric Cheng 		if (rc != 0)
1083da14cebeSEric Cheng 			goto bail;
1084da14cebeSEric Cheng 
1085da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1086da14cebeSEric Cheng 
1087da14cebeSEric Cheng 		/* set LACP mode */
1088da14cebeSEric Cheng 		aggr_port_lacp_set_mode(grp, port);
1089da14cebeSEric Cheng 
10907c478bd9Sstevel@tonic-gate 		/* start port if group has already been started */
10917c478bd9Sstevel@tonic-gate 		if (grp->lg_started) {
10927c478bd9Sstevel@tonic-gate 			rc = aggr_port_start(port);
10937c478bd9Sstevel@tonic-gate 			if (rc != 0) {
1094da14cebeSEric Cheng 				mac_perim_exit(pmph);
10957c478bd9Sstevel@tonic-gate 				goto bail;
10967c478bd9Sstevel@tonic-gate 			}
10977c478bd9Sstevel@tonic-gate 
1098da14cebeSEric Cheng 			/*
1099da14cebeSEric Cheng 			 * Turn on the promiscuous mode over the port when it
1100da14cebeSEric Cheng 			 * is requested to be turned on to receive the
1101da14cebeSEric Cheng 			 * non-primary address over a port, or the promiscous
1102da14cebeSEric Cheng 			 * mode is enabled over the aggr.
1103da14cebeSEric Cheng 			 */
1104da14cebeSEric Cheng 			if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1105da14cebeSEric Cheng 				rc = aggr_port_promisc(port, B_TRUE);
11067c478bd9Sstevel@tonic-gate 				if (rc != 0) {
1107da14cebeSEric Cheng 					mac_perim_exit(pmph);
11087c478bd9Sstevel@tonic-gate 					goto bail;
11097c478bd9Sstevel@tonic-gate 				}
11107c478bd9Sstevel@tonic-gate 			}
1111da14cebeSEric Cheng 		}
1112da14cebeSEric Cheng 		mac_perim_exit(pmph);
1113c615009fSyz147064 
1114c615009fSyz147064 		/*
1115c615009fSyz147064 		 * Attach each port if necessary.
1116c615009fSyz147064 		 */
1117da14cebeSEric Cheng 		if (aggr_port_notify_link(grp, port))
1118392b1d6eSyz147064 			link_state_changed = B_TRUE;
1119da14cebeSEric Cheng 
1120da14cebeSEric Cheng 		/*
1121da14cebeSEric Cheng 		 * Initialize the callback functions for this port.
1122da14cebeSEric Cheng 		 */
1123da14cebeSEric Cheng 		aggr_port_init_callbacks(port);
11247c478bd9Sstevel@tonic-gate 	}
11257c478bd9Sstevel@tonic-gate 
11267c478bd9Sstevel@tonic-gate 	/* update the MAC address of the constituent ports */
1127392b1d6eSyz147064 	if (aggr_grp_update_ports_mac(grp))
1128392b1d6eSyz147064 		link_state_changed = B_TRUE;
1129c615009fSyz147064 
1130c615009fSyz147064 	if (link_state_changed)
1131ba2e4443Sseb 		mac_link_update(grp->lg_mh, grp->lg_link_state);
11327c478bd9Sstevel@tonic-gate 
11337c478bd9Sstevel@tonic-gate bail:
11347c478bd9Sstevel@tonic-gate 	if (rc != 0) {
11357c478bd9Sstevel@tonic-gate 		/* stop and remove ports that have been added */
1136da14cebeSEric Cheng 		for (i = 0; i < nadded; i++) {
1137d62bc4baSyz147064 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
11387c478bd9Sstevel@tonic-gate 			ASSERT(port != NULL);
11397c478bd9Sstevel@tonic-gate 			if (grp->lg_started) {
1140da14cebeSEric Cheng 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
1141da14cebeSEric Cheng 				(void) aggr_port_promisc(port, B_FALSE);
11427c478bd9Sstevel@tonic-gate 				aggr_port_stop(port);
1143da14cebeSEric Cheng 				mac_perim_exit(pmph);
11447c478bd9Sstevel@tonic-gate 			}
11450dc2366fSVenugopal Iyer 			aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1146da14cebeSEric Cheng 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
11474deae11aSyz147064 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
11487c478bd9Sstevel@tonic-gate 		}
11497c478bd9Sstevel@tonic-gate 	}
11507c478bd9Sstevel@tonic-gate 
1151da14cebeSEric Cheng 	mac_perim_exit(mph);
11527c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
11537c478bd9Sstevel@tonic-gate 	return (rc);
11547c478bd9Sstevel@tonic-gate }
11557c478bd9Sstevel@tonic-gate 
1156da14cebeSEric Cheng static int
aggr_grp_modify_common(aggr_grp_t * grp,uint8_t update_mask,uint32_t policy,boolean_t mac_fixed,const uchar_t * mac_addr,aggr_lacp_mode_t lacp_mode,aggr_lacp_timer_t lacp_timer)1157da14cebeSEric Cheng aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
1158da14cebeSEric Cheng     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1159da14cebeSEric Cheng     aggr_lacp_timer_t lacp_timer)
11607c478bd9Sstevel@tonic-gate {
11617c478bd9Sstevel@tonic-gate 	boolean_t mac_addr_changed = B_FALSE;
11624deae11aSyz147064 	boolean_t link_state_changed = B_FALSE;
1163da14cebeSEric Cheng 	mac_perim_handle_t pmph;
11647c478bd9Sstevel@tonic-gate 
1165da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 	/* validate fixed address if specified */
11687c478bd9Sstevel@tonic-gate 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
11697c478bd9Sstevel@tonic-gate 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
11707c478bd9Sstevel@tonic-gate 	    (mac_addr[0] & 0x01))) {
1171da14cebeSEric Cheng 		return (EINVAL);
11727c478bd9Sstevel@tonic-gate 	}
11737c478bd9Sstevel@tonic-gate 
11747c478bd9Sstevel@tonic-gate 	/* update policy if requested */
11757c478bd9Sstevel@tonic-gate 	if (update_mask & AGGR_MODIFY_POLICY)
11767c478bd9Sstevel@tonic-gate 		aggr_send_update_policy(grp, policy);
11777c478bd9Sstevel@tonic-gate 
11787c478bd9Sstevel@tonic-gate 	/* update unicast MAC address if requested */
11797c478bd9Sstevel@tonic-gate 	if (update_mask & AGGR_MODIFY_MAC) {
11807c478bd9Sstevel@tonic-gate 		if (mac_fixed) {
11817c478bd9Sstevel@tonic-gate 			/* user-supplied MAC address */
11827c478bd9Sstevel@tonic-gate 			grp->lg_mac_addr_port = NULL;
11837c478bd9Sstevel@tonic-gate 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
11847c478bd9Sstevel@tonic-gate 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
11857c478bd9Sstevel@tonic-gate 				mac_addr_changed = B_TRUE;
11867c478bd9Sstevel@tonic-gate 			}
11877c478bd9Sstevel@tonic-gate 		} else if (grp->lg_addr_fixed) {
11887c478bd9Sstevel@tonic-gate 			/* switch from user-supplied to automatic */
11897c478bd9Sstevel@tonic-gate 			aggr_port_t *port = grp->lg_ports;
11907c478bd9Sstevel@tonic-gate 
1191da14cebeSEric Cheng 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
11927c478bd9Sstevel@tonic-gate 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
11937c478bd9Sstevel@tonic-gate 			grp->lg_mac_addr_port = port;
11947c478bd9Sstevel@tonic-gate 			mac_addr_changed = B_TRUE;
1195da14cebeSEric Cheng 			mac_perim_exit(pmph);
11967c478bd9Sstevel@tonic-gate 		}
11977c478bd9Sstevel@tonic-gate 		grp->lg_addr_fixed = mac_fixed;
11987c478bd9Sstevel@tonic-gate 	}
11997c478bd9Sstevel@tonic-gate 
12007c478bd9Sstevel@tonic-gate 	if (mac_addr_changed)
12014deae11aSyz147064 		link_state_changed = aggr_grp_update_ports_mac(grp);
12027c478bd9Sstevel@tonic-gate 
12037c478bd9Sstevel@tonic-gate 	if (update_mask & AGGR_MODIFY_LACP_MODE)
12047c478bd9Sstevel@tonic-gate 		aggr_lacp_update_mode(grp, lacp_mode);
12057c478bd9Sstevel@tonic-gate 
1206da14cebeSEric Cheng 	if (update_mask & AGGR_MODIFY_LACP_TIMER)
12077c478bd9Sstevel@tonic-gate 		aggr_lacp_update_timer(grp, lacp_timer);
12087c478bd9Sstevel@tonic-gate 
12094deae11aSyz147064 	if (link_state_changed)
1210ba2e4443Sseb 		mac_link_update(grp->lg_mh, grp->lg_link_state);
12114deae11aSyz147064 
1212da14cebeSEric Cheng 	if (mac_addr_changed)
1213da14cebeSEric Cheng 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
1214da14cebeSEric Cheng 
1215da14cebeSEric Cheng 	return (0);
12164deae11aSyz147064 }
12174deae11aSyz147064 
1218da14cebeSEric Cheng /*
1219da14cebeSEric Cheng  * Update properties of an existing link aggregation group.
1220da14cebeSEric Cheng  */
1221da14cebeSEric Cheng int
aggr_grp_modify(datalink_id_t linkid,uint8_t update_mask,uint32_t policy,boolean_t mac_fixed,const uchar_t * mac_addr,aggr_lacp_mode_t lacp_mode,aggr_lacp_timer_t lacp_timer)1222da14cebeSEric Cheng aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
1223da14cebeSEric Cheng     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1224da14cebeSEric Cheng     aggr_lacp_timer_t lacp_timer)
1225da14cebeSEric Cheng {
1226da14cebeSEric Cheng 	aggr_grp_t *grp = NULL;
1227da14cebeSEric Cheng 	mac_perim_handle_t mph;
1228da14cebeSEric Cheng 	int err;
1229da14cebeSEric Cheng 
1230da14cebeSEric Cheng 	/* get group corresponding to linkid */
1231da14cebeSEric Cheng 	rw_enter(&aggr_grp_lock, RW_READER);
1232da14cebeSEric Cheng 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1233da14cebeSEric Cheng 	    (mod_hash_val_t *)&grp) != 0) {
1234210db224Sericheng 		rw_exit(&aggr_grp_lock);
1235da14cebeSEric Cheng 		return (ENOENT);
12367c478bd9Sstevel@tonic-gate 	}
1237da14cebeSEric Cheng 	AGGR_GRP_REFHOLD(grp);
12387c478bd9Sstevel@tonic-gate 
1239da14cebeSEric Cheng 	/*
1240da14cebeSEric Cheng 	 * Hold the perimeter so that the aggregation won't be destroyed.
1241da14cebeSEric Cheng 	 */
1242da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1243da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
1244da14cebeSEric Cheng 
1245da14cebeSEric Cheng 	err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
1246da14cebeSEric Cheng 	    mac_addr, lacp_mode, lacp_timer);
1247da14cebeSEric Cheng 
1248da14cebeSEric Cheng 	mac_perim_exit(mph);
12497c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
1250da14cebeSEric Cheng 	return (err);
12517c478bd9Sstevel@tonic-gate }
12527c478bd9Sstevel@tonic-gate 
12537c478bd9Sstevel@tonic-gate /*
12547c478bd9Sstevel@tonic-gate  * Create a new link aggregation group upon request from administrator.
12557c478bd9Sstevel@tonic-gate  * Returns 0 on success, an errno on failure.
12567c478bd9Sstevel@tonic-gate  */
12577c478bd9Sstevel@tonic-gate int
aggr_grp_create(datalink_id_t linkid,uint32_t key,uint_t nports,laioc_port_t * ports,uint32_t policy,boolean_t mac_fixed,boolean_t force,uchar_t * mac_addr,aggr_lacp_mode_t lacp_mode,aggr_lacp_timer_t lacp_timer,cred_t * credp)1258d62bc4baSyz147064 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
1259d62bc4baSyz147064     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
12602b24ab6bSSebastien Roy     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
12612b24ab6bSSebastien Roy     cred_t *credp)
12627c478bd9Sstevel@tonic-gate {
12637c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = NULL;
12647c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
1265ba2e4443Sseb 	mac_register_t *mac;
12664deae11aSyz147064 	boolean_t link_state_changed;
1267da14cebeSEric Cheng 	mac_perim_handle_t mph;
12687c478bd9Sstevel@tonic-gate 	int err;
12697c478bd9Sstevel@tonic-gate 	int i;
12700dc2366fSVenugopal Iyer 	kt_did_t tid = 0;
12717c478bd9Sstevel@tonic-gate 
12727c478bd9Sstevel@tonic-gate 	/* need at least one port */
12737c478bd9Sstevel@tonic-gate 	if (nports == 0)
12747c478bd9Sstevel@tonic-gate 		return (EINVAL);
12757c478bd9Sstevel@tonic-gate 
1276210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_WRITER);
12777c478bd9Sstevel@tonic-gate 
1278d62bc4baSyz147064 	/* does a group with the same linkid already exist? */
1279d62bc4baSyz147064 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1280210db224Sericheng 	    (mod_hash_val_t *)&grp);
1281210db224Sericheng 	if (err == 0) {
1282210db224Sericheng 		rw_exit(&aggr_grp_lock);
12837c478bd9Sstevel@tonic-gate 		return (EEXIST);
12847c478bd9Sstevel@tonic-gate 	}
12857c478bd9Sstevel@tonic-gate 
12867c478bd9Sstevel@tonic-gate 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
12877c478bd9Sstevel@tonic-gate 
12887c478bd9Sstevel@tonic-gate 	grp->lg_refs = 1;
12894deae11aSyz147064 	grp->lg_closing = B_FALSE;
1290d62bc4baSyz147064 	grp->lg_force = force;
1291d62bc4baSyz147064 	grp->lg_linkid = linkid;
12922b24ab6bSSebastien Roy 	grp->lg_zoneid = crgetzoneid(credp);
12937c478bd9Sstevel@tonic-gate 	grp->lg_ifspeed = 0;
12947c478bd9Sstevel@tonic-gate 	grp->lg_link_state = LINK_STATE_UNKNOWN;
12957c478bd9Sstevel@tonic-gate 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
12967c478bd9Sstevel@tonic-gate 	grp->lg_started = B_FALSE;
12977c478bd9Sstevel@tonic-gate 	grp->lg_promisc = B_FALSE;
1298da14cebeSEric Cheng 	grp->lg_lacp_done = B_FALSE;
12990dc2366fSVenugopal Iyer 	grp->lg_tx_notify_done = B_FALSE;
1300da14cebeSEric Cheng 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1301da14cebeSEric Cheng 	grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1302da14cebeSEric Cheng 	    aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
13030dc2366fSVenugopal Iyer 	grp->lg_tx_notify_thread = thread_create(NULL, 0,
13040dc2366fSVenugopal Iyer 	    aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
13050dc2366fSVenugopal Iyer 	grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
13060dc2366fSVenugopal Iyer 	    MAX_RINGS_PER_GROUP), KM_SLEEP);
13070dc2366fSVenugopal Iyer 	grp->lg_tx_blocked_cnt = 0;
1308da14cebeSEric Cheng 	bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
13090dc2366fSVenugopal Iyer 	bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
13107c478bd9Sstevel@tonic-gate 	aggr_lacp_init_grp(grp);
13117c478bd9Sstevel@tonic-gate 
13127c478bd9Sstevel@tonic-gate 	/* add MAC ports to group */
13137c478bd9Sstevel@tonic-gate 	grp->lg_ports = NULL;
13147c478bd9Sstevel@tonic-gate 	grp->lg_nports = 0;
13157c478bd9Sstevel@tonic-gate 	grp->lg_nattached_ports = 0;
13167c478bd9Sstevel@tonic-gate 	grp->lg_ntx_ports = 0;
13177c478bd9Sstevel@tonic-gate 
1318d62bc4baSyz147064 	/*
1319d62bc4baSyz147064 	 * If key is not specified by the user, allocate the key.
1320d62bc4baSyz147064 	 */
1321d62bc4baSyz147064 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1322d62bc4baSyz147064 		err = ENOMEM;
1323d62bc4baSyz147064 		goto bail;
1324d62bc4baSyz147064 	}
1325d62bc4baSyz147064 	grp->lg_key = key;
1326d62bc4baSyz147064 
13277c478bd9Sstevel@tonic-gate 	for (i = 0; i < nports; i++) {
1328d62bc4baSyz147064 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
13297c478bd9Sstevel@tonic-gate 		if (err != 0)
13307c478bd9Sstevel@tonic-gate 			goto bail;
13317c478bd9Sstevel@tonic-gate 	}
13327c478bd9Sstevel@tonic-gate 
13337c478bd9Sstevel@tonic-gate 	/*
13347c478bd9Sstevel@tonic-gate 	 * If no explicit MAC address was specified by the administrator,
13357c478bd9Sstevel@tonic-gate 	 * set it to the MAC address of the first port.
13367c478bd9Sstevel@tonic-gate 	 */
13377c478bd9Sstevel@tonic-gate 	grp->lg_addr_fixed = mac_fixed;
13387c478bd9Sstevel@tonic-gate 	if (grp->lg_addr_fixed) {
13397c478bd9Sstevel@tonic-gate 		/* validate specified address */
13407c478bd9Sstevel@tonic-gate 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
13417c478bd9Sstevel@tonic-gate 			err = EINVAL;
13427c478bd9Sstevel@tonic-gate 			goto bail;
13437c478bd9Sstevel@tonic-gate 		}
13447c478bd9Sstevel@tonic-gate 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
13457c478bd9Sstevel@tonic-gate 	} else {
13467c478bd9Sstevel@tonic-gate 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
13477c478bd9Sstevel@tonic-gate 		grp->lg_mac_addr_port = grp->lg_ports;
13487c478bd9Sstevel@tonic-gate 	}
13497c478bd9Sstevel@tonic-gate 
13507c478bd9Sstevel@tonic-gate 	/* set the initial group capabilities */
13517c478bd9Sstevel@tonic-gate 	aggr_grp_capab_set(grp);
13527c478bd9Sstevel@tonic-gate 
1353d62bc4baSyz147064 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1354d62bc4baSyz147064 		err = ENOMEM;
1355ba2e4443Sseb 		goto bail;
1356d62bc4baSyz147064 	}
1357ba2e4443Sseb 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1358ba2e4443Sseb 	mac->m_driver = grp;
1359ba2e4443Sseb 	mac->m_dip = aggr_dip;
1360d62bc4baSyz147064 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1361ba2e4443Sseb 	mac->m_src_addr = grp->lg_addr;
1362ba2e4443Sseb 	mac->m_callbacks = &aggr_m_callbacks;
1363ba2e4443Sseb 	mac->m_min_sdu = 0;
1364f4420ae7Snd99603 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1365d62bc4baSyz147064 	mac->m_margin = aggr_grp_max_margin(grp);
1366da14cebeSEric Cheng 	mac->m_v12n = MAC_VIRT_LEVEL1;
1367ba2e4443Sseb 	err = mac_register(mac, &grp->lg_mh);
1368ba2e4443Sseb 	mac_free(mac);
1369ba2e4443Sseb 	if (err != 0)
13707c478bd9Sstevel@tonic-gate 		goto bail;
13717c478bd9Sstevel@tonic-gate 
13722b24ab6bSSebastien Roy 	err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
13732b24ab6bSSebastien Roy 	if (err != 0) {
1374d62bc4baSyz147064 		(void) mac_unregister(grp->lg_mh);
1375da14cebeSEric Cheng 		grp->lg_mh = NULL;
1376d62bc4baSyz147064 		goto bail;
1377d62bc4baSyz147064 	}
1378d62bc4baSyz147064 
1379da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1380da14cebeSEric Cheng 
1381da14cebeSEric Cheng 	/*
1382da14cebeSEric Cheng 	 * Update the MAC address of the constituent ports.
1383da14cebeSEric Cheng 	 * None of the port is attached at this time, the link state of the
1384da14cebeSEric Cheng 	 * aggregation will not change.
1385da14cebeSEric Cheng 	 */
1386da14cebeSEric Cheng 	link_state_changed = aggr_grp_update_ports_mac(grp);
1387da14cebeSEric Cheng 	ASSERT(!link_state_changed);
1388da14cebeSEric Cheng 
1389da14cebeSEric Cheng 	/* update outbound load balancing policy */
1390da14cebeSEric Cheng 	aggr_send_update_policy(grp, policy);
1391da14cebeSEric Cheng 
13927c478bd9Sstevel@tonic-gate 	/* set LACP mode */
13937c478bd9Sstevel@tonic-gate 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
13947c478bd9Sstevel@tonic-gate 
1395c615009fSyz147064 	/*
1396c615009fSyz147064 	 * Attach each port if necessary.
1397c615009fSyz147064 	 */
1398392b1d6eSyz147064 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1399da14cebeSEric Cheng 		/*
1400da14cebeSEric Cheng 		 * Create the pseudo ring for each HW ring of the underlying
1401da14cebeSEric Cheng 		 * port. Note that this is done after the aggr registers the
1402da14cebeSEric Cheng 		 * mac.
1403da14cebeSEric Cheng 		 */
14040dc2366fSVenugopal Iyer 		VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0);
1405da14cebeSEric Cheng 		VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1406da14cebeSEric Cheng 		if (aggr_port_notify_link(grp, port))
1407392b1d6eSyz147064 			link_state_changed = B_TRUE;
1408da14cebeSEric Cheng 
1409da14cebeSEric Cheng 		/*
1410da14cebeSEric Cheng 		 * Initialize the callback functions for this port.
1411da14cebeSEric Cheng 		 */
1412da14cebeSEric Cheng 		aggr_port_init_callbacks(port);
1413392b1d6eSyz147064 	}
1414392b1d6eSyz147064 
1415392b1d6eSyz147064 	if (link_state_changed)
1416392b1d6eSyz147064 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1417c615009fSyz147064 
14187c478bd9Sstevel@tonic-gate 	/* add new group to hash table */
1419d62bc4baSyz147064 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1420210db224Sericheng 	    (mod_hash_val_t)grp);
14217c478bd9Sstevel@tonic-gate 	ASSERT(err == 0);
1422210db224Sericheng 	aggr_grp_cnt++;
14237c478bd9Sstevel@tonic-gate 
1424da14cebeSEric Cheng 	mac_perim_exit(mph);
1425210db224Sericheng 	rw_exit(&aggr_grp_lock);
14267c478bd9Sstevel@tonic-gate 	return (0);
14277c478bd9Sstevel@tonic-gate 
14287c478bd9Sstevel@tonic-gate bail:
14297c478bd9Sstevel@tonic-gate 
14304deae11aSyz147064 	grp->lg_closing = B_TRUE;
1431490ed22dSyz147064 
14327c478bd9Sstevel@tonic-gate 	port = grp->lg_ports;
14337c478bd9Sstevel@tonic-gate 	while (port != NULL) {
1434da14cebeSEric Cheng 		aggr_port_t *cport;
1435da14cebeSEric Cheng 
14367c478bd9Sstevel@tonic-gate 		cport = port->lp_next;
14377c478bd9Sstevel@tonic-gate 		aggr_port_delete(port);
14387c478bd9Sstevel@tonic-gate 		port = cport;
14397c478bd9Sstevel@tonic-gate 	}
14407c478bd9Sstevel@tonic-gate 
1441da14cebeSEric Cheng 	/*
1442da14cebeSEric Cheng 	 * Inform the lacp_rx thread to exit.
1443da14cebeSEric Cheng 	 */
1444da14cebeSEric Cheng 	mutex_enter(&grp->lg_lacp_lock);
1445da14cebeSEric Cheng 	grp->lg_lacp_done = B_TRUE;
1446da14cebeSEric Cheng 	cv_signal(&grp->lg_lacp_cv);
1447da14cebeSEric Cheng 	while (grp->lg_lacp_rx_thread != NULL)
1448da14cebeSEric Cheng 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1449da14cebeSEric Cheng 	mutex_exit(&grp->lg_lacp_lock);
14500dc2366fSVenugopal Iyer 	/*
14510dc2366fSVenugopal Iyer 	 * Inform the tx_notify thread to exit.
14520dc2366fSVenugopal Iyer 	 */
14530dc2366fSVenugopal Iyer 	mutex_enter(&grp->lg_tx_flowctl_lock);
14540dc2366fSVenugopal Iyer 	if (grp->lg_tx_notify_thread != NULL) {
14550dc2366fSVenugopal Iyer 		tid = grp->lg_tx_notify_thread->t_did;
14560dc2366fSVenugopal Iyer 		grp->lg_tx_notify_done = B_TRUE;
14570dc2366fSVenugopal Iyer 		cv_signal(&grp->lg_tx_flowctl_cv);
14580dc2366fSVenugopal Iyer 	}
14590dc2366fSVenugopal Iyer 	mutex_exit(&grp->lg_tx_flowctl_lock);
14600dc2366fSVenugopal Iyer 	if (tid != 0)
14610dc2366fSVenugopal Iyer 		thread_join(tid);
14627c478bd9Sstevel@tonic-gate 
14630dc2366fSVenugopal Iyer 	kmem_free(grp->lg_tx_blocked_rings,
14640dc2366fSVenugopal Iyer 	    (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1465210db224Sericheng 	rw_exit(&aggr_grp_lock);
1466da14cebeSEric Cheng 	AGGR_GRP_REFRELE(grp);
14677c478bd9Sstevel@tonic-gate 	return (err);
14687c478bd9Sstevel@tonic-gate }
14697c478bd9Sstevel@tonic-gate 
14707c478bd9Sstevel@tonic-gate /*
1471d62bc4baSyz147064  * Return a pointer to the member of a group with specified linkid.
14727c478bd9Sstevel@tonic-gate  */
14737c478bd9Sstevel@tonic-gate static aggr_port_t *
aggr_grp_port_lookup(aggr_grp_t * grp,datalink_id_t linkid)1474d62bc4baSyz147064 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
14757c478bd9Sstevel@tonic-gate {
14767c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
14777c478bd9Sstevel@tonic-gate 
1478da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
14797c478bd9Sstevel@tonic-gate 
14807c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1481d62bc4baSyz147064 		if (port->lp_linkid == linkid)
14827c478bd9Sstevel@tonic-gate 			break;
14837c478bd9Sstevel@tonic-gate 	}
14847c478bd9Sstevel@tonic-gate 
14857c478bd9Sstevel@tonic-gate 	return (port);
14867c478bd9Sstevel@tonic-gate }
14877c478bd9Sstevel@tonic-gate 
14887c478bd9Sstevel@tonic-gate /*
14897c478bd9Sstevel@tonic-gate  * Stop, detach and remove a port from a link aggregation group.
14907c478bd9Sstevel@tonic-gate  */
14917c478bd9Sstevel@tonic-gate static int
aggr_grp_rem_port(aggr_grp_t * grp,aggr_port_t * port,boolean_t * mac_addr_changedp,boolean_t * link_state_changedp)14924deae11aSyz147064 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
14934deae11aSyz147064     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
14947c478bd9Sstevel@tonic-gate {
14954deae11aSyz147064 	int rc = 0;
14967c478bd9Sstevel@tonic-gate 	aggr_port_t **pport;
14974deae11aSyz147064 	boolean_t mac_addr_changed = B_FALSE;
14984deae11aSyz147064 	boolean_t link_state_changed = B_FALSE;
1499da14cebeSEric Cheng 	mac_perim_handle_t mph;
15007c478bd9Sstevel@tonic-gate 	uint64_t val;
15017c478bd9Sstevel@tonic-gate 	uint_t i;
1502ba2e4443Sseb 	uint_t stat;
15037c478bd9Sstevel@tonic-gate 
1504da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
15057c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_nports > 1);
15064deae11aSyz147064 	ASSERT(!grp->lg_closing);
15077c478bd9Sstevel@tonic-gate 
15087c478bd9Sstevel@tonic-gate 	/* unlink port */
15097c478bd9Sstevel@tonic-gate 	for (pport = &grp->lg_ports; *pport != port;
15107c478bd9Sstevel@tonic-gate 	    pport = &(*pport)->lp_next) {
15114deae11aSyz147064 		if (*pport == NULL) {
15124deae11aSyz147064 			rc = ENOENT;
15134deae11aSyz147064 			goto done;
15144deae11aSyz147064 		}
15157c478bd9Sstevel@tonic-gate 	}
15167c478bd9Sstevel@tonic-gate 	*pport = port->lp_next;
15177c478bd9Sstevel@tonic-gate 
1518da14cebeSEric Cheng 	mac_perim_enter_by_mh(port->lp_mh, &mph);
15197c478bd9Sstevel@tonic-gate 
15207c478bd9Sstevel@tonic-gate 	/*
15217c478bd9Sstevel@tonic-gate 	 * If the MAC address of the port being removed was assigned
15227c478bd9Sstevel@tonic-gate 	 * to the group, update the group MAC address
15237c478bd9Sstevel@tonic-gate 	 * using the MAC address of a different port.
15247c478bd9Sstevel@tonic-gate 	 */
15257c478bd9Sstevel@tonic-gate 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
15267c478bd9Sstevel@tonic-gate 		/*
15277c478bd9Sstevel@tonic-gate 		 * Set the MAC address of the group to the
15287c478bd9Sstevel@tonic-gate 		 * MAC address of its first port.
15297c478bd9Sstevel@tonic-gate 		 */
15307c478bd9Sstevel@tonic-gate 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
15317c478bd9Sstevel@tonic-gate 		grp->lg_mac_addr_port = grp->lg_ports;
15324deae11aSyz147064 		mac_addr_changed = B_TRUE;
15337c478bd9Sstevel@tonic-gate 	}
15347c478bd9Sstevel@tonic-gate 
1535da14cebeSEric Cheng 	link_state_changed = aggr_grp_detach_port(grp, port);
15367c478bd9Sstevel@tonic-gate 
15377c478bd9Sstevel@tonic-gate 	/*
1538ba2e4443Sseb 	 * Add the counter statistics of the ports while it was aggregated
1539ba2e4443Sseb 	 * to the group's residual statistics.  This is done by obtaining
1540ba2e4443Sseb 	 * the current counter from the underlying MAC then subtracting the
1541ba2e4443Sseb 	 * value of the counter at the moment it was added to the
1542ba2e4443Sseb 	 * aggregation.
15437c478bd9Sstevel@tonic-gate 	 */
1544da14cebeSEric Cheng 	for (i = 0; i < MAC_NSTAT; i++) {
1545ba2e4443Sseb 		stat = i + MAC_STAT_MIN;
1546ba2e4443Sseb 		if (!MAC_STAT_ISACOUNTER(stat))
15477c478bd9Sstevel@tonic-gate 			continue;
1548ba2e4443Sseb 		val = aggr_port_stat(port, stat);
15497c478bd9Sstevel@tonic-gate 		val -= port->lp_stat[i];
15507c478bd9Sstevel@tonic-gate 		grp->lg_stat[i] += val;
15517c478bd9Sstevel@tonic-gate 	}
1552da14cebeSEric Cheng 	for (i = 0; i < ETHER_NSTAT; i++) {
1553ba2e4443Sseb 		stat = i + MACTYPE_STAT_MIN;
1554ba2e4443Sseb 		if (!ETHER_STAT_ISACOUNTER(stat))
1555ba2e4443Sseb 			continue;
1556ba2e4443Sseb 		val = aggr_port_stat(port, stat);
1557ba2e4443Sseb 		val -= port->lp_ether_stat[i];
1558ba2e4443Sseb 		grp->lg_ether_stat[i] += val;
1559ba2e4443Sseb 	}
15607c478bd9Sstevel@tonic-gate 
15617c478bd9Sstevel@tonic-gate 	grp->lg_nports--;
1562da14cebeSEric Cheng 	mac_perim_exit(mph);
15637c478bd9Sstevel@tonic-gate 
15640dc2366fSVenugopal Iyer 	aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
15657c478bd9Sstevel@tonic-gate 	aggr_port_delete(port);
15667c478bd9Sstevel@tonic-gate 
15677c478bd9Sstevel@tonic-gate 	/*
15687c478bd9Sstevel@tonic-gate 	 * If the group MAC address has changed, update the MAC address of
1569d62bc4baSyz147064 	 * the remaining constituent ports according to the new MAC
15707c478bd9Sstevel@tonic-gate 	 * address of the group.
15717c478bd9Sstevel@tonic-gate 	 */
1572392b1d6eSyz147064 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1573392b1d6eSyz147064 		link_state_changed = B_TRUE;
15747c478bd9Sstevel@tonic-gate 
15754deae11aSyz147064 done:
15764deae11aSyz147064 	if (mac_addr_changedp != NULL)
15774deae11aSyz147064 		*mac_addr_changedp = mac_addr_changed;
15784deae11aSyz147064 	if (link_state_changedp != NULL)
15794deae11aSyz147064 		*link_state_changedp = link_state_changed;
15807c478bd9Sstevel@tonic-gate 
15814deae11aSyz147064 	return (rc);
15827c478bd9Sstevel@tonic-gate }
15837c478bd9Sstevel@tonic-gate 
15847c478bd9Sstevel@tonic-gate /*
15857c478bd9Sstevel@tonic-gate  * Remove one or more ports from an existing link aggregation group.
15867c478bd9Sstevel@tonic-gate  */
15877c478bd9Sstevel@tonic-gate int
aggr_grp_rem_ports(datalink_id_t linkid,uint_t nports,laioc_port_t * ports)1588d62bc4baSyz147064 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
15897c478bd9Sstevel@tonic-gate {
15907c478bd9Sstevel@tonic-gate 	int rc = 0, i;
15917c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = NULL;
15927c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
15934deae11aSyz147064 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
15944deae11aSyz147064 	boolean_t link_state_update = B_FALSE, link_state_changed;
1595da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
15967c478bd9Sstevel@tonic-gate 
1597d62bc4baSyz147064 	/* get group corresponding to linkid */
1598210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_READER);
1599d62bc4baSyz147064 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1600210db224Sericheng 	    (mod_hash_val_t *)&grp) != 0) {
1601210db224Sericheng 		rw_exit(&aggr_grp_lock);
1602210db224Sericheng 		return (ENOENT);
16037c478bd9Sstevel@tonic-gate 	}
16047c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
1605210db224Sericheng 
1606da14cebeSEric Cheng 	/*
1607da14cebeSEric Cheng 	 * Hold the perimeter so that the aggregation won't be destroyed.
1608da14cebeSEric Cheng 	 */
1609da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1610da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
16117c478bd9Sstevel@tonic-gate 
16127c478bd9Sstevel@tonic-gate 	/* we need to keep at least one port per group */
16137c478bd9Sstevel@tonic-gate 	if (nports >= grp->lg_nports) {
16147c478bd9Sstevel@tonic-gate 		rc = EINVAL;
16157c478bd9Sstevel@tonic-gate 		goto bail;
16167c478bd9Sstevel@tonic-gate 	}
16177c478bd9Sstevel@tonic-gate 
16187c478bd9Sstevel@tonic-gate 	/* first verify that all the groups are valid */
16197c478bd9Sstevel@tonic-gate 	for (i = 0; i < nports; i++) {
1620d62bc4baSyz147064 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
16217c478bd9Sstevel@tonic-gate 			/* port not found */
16227c478bd9Sstevel@tonic-gate 			rc = ENOENT;
16237c478bd9Sstevel@tonic-gate 			goto bail;
16247c478bd9Sstevel@tonic-gate 		}
16257c478bd9Sstevel@tonic-gate 	}
16267c478bd9Sstevel@tonic-gate 
1627da14cebeSEric Cheng 	/* clear the promiscous mode for the specified ports */
1628da14cebeSEric Cheng 	for (i = 0; i < nports && rc == 0; i++) {
1629da14cebeSEric Cheng 		/* lookup port */
1630da14cebeSEric Cheng 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1631da14cebeSEric Cheng 		ASSERT(port != NULL);
1632da14cebeSEric Cheng 
1633da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1634da14cebeSEric Cheng 		rc = aggr_port_promisc(port, B_FALSE);
1635da14cebeSEric Cheng 		mac_perim_exit(pmph);
1636da14cebeSEric Cheng 	}
1637da14cebeSEric Cheng 	if (rc != 0) {
1638da14cebeSEric Cheng 		for (i = 0; i < nports; i++) {
1639da14cebeSEric Cheng 			port = aggr_grp_port_lookup(grp,
1640da14cebeSEric Cheng 			    ports[i].lp_linkid);
1641da14cebeSEric Cheng 			ASSERT(port != NULL);
1642da14cebeSEric Cheng 
1643da14cebeSEric Cheng 			/*
1644da14cebeSEric Cheng 			 * Turn the promiscuous mode back on if it is required
1645da14cebeSEric Cheng 			 * to receive the non-primary address over a port, or
1646da14cebeSEric Cheng 			 * the promiscous mode is enabled over the aggr.
1647da14cebeSEric Cheng 			 */
1648da14cebeSEric Cheng 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1649da14cebeSEric Cheng 			if (port->lp_started && (grp->lg_promisc ||
1650da14cebeSEric Cheng 			    port->lp_prom_addr != NULL)) {
1651da14cebeSEric Cheng 				(void) aggr_port_promisc(port, B_TRUE);
1652da14cebeSEric Cheng 			}
1653da14cebeSEric Cheng 			mac_perim_exit(pmph);
1654da14cebeSEric Cheng 		}
1655da14cebeSEric Cheng 		goto bail;
1656da14cebeSEric Cheng 	}
1657da14cebeSEric Cheng 
16587c478bd9Sstevel@tonic-gate 	/* remove the specified ports from group */
1659da14cebeSEric Cheng 	for (i = 0; i < nports; i++) {
16607c478bd9Sstevel@tonic-gate 		/* lookup port */
1661d62bc4baSyz147064 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
16627c478bd9Sstevel@tonic-gate 		ASSERT(port != NULL);
16637c478bd9Sstevel@tonic-gate 
16647c478bd9Sstevel@tonic-gate 		/* stop port if group has already been started */
16657c478bd9Sstevel@tonic-gate 		if (grp->lg_started) {
1666da14cebeSEric Cheng 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
16677c478bd9Sstevel@tonic-gate 			aggr_port_stop(port);
1668da14cebeSEric Cheng 			mac_perim_exit(pmph);
16697c478bd9Sstevel@tonic-gate 		}
16707c478bd9Sstevel@tonic-gate 
16710dc2366fSVenugopal Iyer 		/*
16720dc2366fSVenugopal Iyer 		 * aggr_rem_pseudo_tx_group() is not called here. Instead
16730dc2366fSVenugopal Iyer 		 * it is called from inside aggr_grp_rem_port() after the
16740dc2366fSVenugopal Iyer 		 * port has been detached. The reason is that
16750dc2366fSVenugopal Iyer 		 * aggr_rem_pseudo_tx_group() removes one ring at a time
16760dc2366fSVenugopal Iyer 		 * and if there is still traffic going on, then there
16770dc2366fSVenugopal Iyer 		 * is the possibility of aggr_find_tx_ring() returning a
16780dc2366fSVenugopal Iyer 		 * removed ring for transmission. Once the port has been
16790dc2366fSVenugopal Iyer 		 * detached, that port will not be used and
16800dc2366fSVenugopal Iyer 		 * aggr_find_tx_ring() will not return any rings
16810dc2366fSVenugopal Iyer 		 * belonging to it.
16820dc2366fSVenugopal Iyer 		 */
1683da14cebeSEric Cheng 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
16840dc2366fSVenugopal Iyer 
16857c478bd9Sstevel@tonic-gate 		/* remove port from group */
16864deae11aSyz147064 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
16874deae11aSyz147064 		    &link_state_changed);
16887c478bd9Sstevel@tonic-gate 		ASSERT(rc == 0);
16894deae11aSyz147064 		mac_addr_update = mac_addr_update || mac_addr_changed;
16904deae11aSyz147064 		link_state_update = link_state_update || link_state_changed;
16917c478bd9Sstevel@tonic-gate 	}
16927c478bd9Sstevel@tonic-gate 
16937c478bd9Sstevel@tonic-gate bail:
16944deae11aSyz147064 	if (mac_addr_update)
1695ba2e4443Sseb 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
16964deae11aSyz147064 	if (link_state_update)
1697ba2e4443Sseb 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1698da14cebeSEric Cheng 
1699da14cebeSEric Cheng 	mac_perim_exit(mph);
17007c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
17017c478bd9Sstevel@tonic-gate 
17027c478bd9Sstevel@tonic-gate 	return (rc);
17037c478bd9Sstevel@tonic-gate }
17047c478bd9Sstevel@tonic-gate 
17057c478bd9Sstevel@tonic-gate int
aggr_grp_delete(datalink_id_t linkid,cred_t * cred)17062b24ab6bSSebastien Roy aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
17077c478bd9Sstevel@tonic-gate {
1708210db224Sericheng 	aggr_grp_t *grp = NULL;
17097c478bd9Sstevel@tonic-gate 	aggr_port_t *port, *cport;
1710d62bc4baSyz147064 	datalink_id_t tmpid;
1711210db224Sericheng 	mod_hash_val_t val;
1712da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
17130466663dSyz147064 	int err;
17140dc2366fSVenugopal Iyer 	kt_did_t tid = 0;
17157c478bd9Sstevel@tonic-gate 
1716210db224Sericheng 	rw_enter(&aggr_grp_lock, RW_WRITER);
17177c478bd9Sstevel@tonic-gate 
1718d62bc4baSyz147064 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1719210db224Sericheng 	    (mod_hash_val_t *)&grp) != 0) {
1720210db224Sericheng 		rw_exit(&aggr_grp_lock);
1721210db224Sericheng 		return (ENOENT);
17227c478bd9Sstevel@tonic-gate 	}
1723490ed22dSyz147064 
1724d62bc4baSyz147064 	/*
1725d62bc4baSyz147064 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1726d62bc4baSyz147064 	 * held. Otherwise, it will deadlock if another thread is in
1727d62bc4baSyz147064 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1728d62bc4baSyz147064 	 * dls_devnet_destroy() needs to delete.
1729d62bc4baSyz147064 	 */
1730da14cebeSEric Cheng 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1731d62bc4baSyz147064 		rw_exit(&aggr_grp_lock);
1732d62bc4baSyz147064 		return (err);
1733d62bc4baSyz147064 	}
1734d62bc4baSyz147064 	ASSERT(linkid == tmpid);
1735d62bc4baSyz147064 
17367c478bd9Sstevel@tonic-gate 	/*
17377c478bd9Sstevel@tonic-gate 	 * Unregister from the MAC service module. Since this can
17387c478bd9Sstevel@tonic-gate 	 * fail if a client hasn't closed the MAC port, we gracefully
17397c478bd9Sstevel@tonic-gate 	 * fail the operation.
17407c478bd9Sstevel@tonic-gate 	 */
17410466663dSyz147064 	if ((err = mac_disable(grp->lg_mh)) != 0) {
17422b24ab6bSSebastien Roy 		(void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
1743210db224Sericheng 		rw_exit(&aggr_grp_lock);
17440466663dSyz147064 		return (err);
17457c478bd9Sstevel@tonic-gate 	}
1746d62bc4baSyz147064 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1747210db224Sericheng 	ASSERT(grp == (aggr_grp_t *)val);
17487c478bd9Sstevel@tonic-gate 
1749210db224Sericheng 	ASSERT(aggr_grp_cnt > 0);
1750210db224Sericheng 	aggr_grp_cnt--;
1751210db224Sericheng 	rw_exit(&aggr_grp_lock);
17527c478bd9Sstevel@tonic-gate 
1753da14cebeSEric Cheng 	/*
1754da14cebeSEric Cheng 	 * Inform the lacp_rx thread to exit.
1755da14cebeSEric Cheng 	 */
1756da14cebeSEric Cheng 	mutex_enter(&grp->lg_lacp_lock);
1757da14cebeSEric Cheng 	grp->lg_lacp_done = B_TRUE;
1758da14cebeSEric Cheng 	cv_signal(&grp->lg_lacp_cv);
1759da14cebeSEric Cheng 	while (grp->lg_lacp_rx_thread != NULL)
1760da14cebeSEric Cheng 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1761da14cebeSEric Cheng 	mutex_exit(&grp->lg_lacp_lock);
17620dc2366fSVenugopal Iyer 	/*
17630dc2366fSVenugopal Iyer 	 * Inform the tx_notify_thread to exit.
17640dc2366fSVenugopal Iyer 	 */
17650dc2366fSVenugopal Iyer 	mutex_enter(&grp->lg_tx_flowctl_lock);
17660dc2366fSVenugopal Iyer 	if (grp->lg_tx_notify_thread != NULL) {
17670dc2366fSVenugopal Iyer 		tid = grp->lg_tx_notify_thread->t_did;
17680dc2366fSVenugopal Iyer 		grp->lg_tx_notify_done = B_TRUE;
17690dc2366fSVenugopal Iyer 		cv_signal(&grp->lg_tx_flowctl_cv);
17700dc2366fSVenugopal Iyer 	}
17710dc2366fSVenugopal Iyer 	mutex_exit(&grp->lg_tx_flowctl_lock);
17720dc2366fSVenugopal Iyer 	if (tid != 0)
17730dc2366fSVenugopal Iyer 		thread_join(tid);
1774da14cebeSEric Cheng 
1775da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1776da14cebeSEric Cheng 
1777da14cebeSEric Cheng 	grp->lg_closing = B_TRUE;
1778da14cebeSEric Cheng 	/* detach and free MAC ports associated with group */
1779da14cebeSEric Cheng 	port = grp->lg_ports;
1780da14cebeSEric Cheng 	while (port != NULL) {
1781da14cebeSEric Cheng 		cport = port->lp_next;
1782da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1783da14cebeSEric Cheng 		if (grp->lg_started)
1784da14cebeSEric Cheng 			aggr_port_stop(port);
1785da14cebeSEric Cheng 		(void) aggr_grp_detach_port(grp, port);
1786da14cebeSEric Cheng 		mac_perim_exit(pmph);
17870dc2366fSVenugopal Iyer 		aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1788da14cebeSEric Cheng 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1789da14cebeSEric Cheng 		aggr_port_delete(port);
1790da14cebeSEric Cheng 		port = cport;
1791da14cebeSEric Cheng 	}
1792da14cebeSEric Cheng 
1793da14cebeSEric Cheng 	mac_perim_exit(mph);
1794da14cebeSEric Cheng 
17950dc2366fSVenugopal Iyer 	kmem_free(grp->lg_tx_blocked_rings,
17960dc2366fSVenugopal Iyer 	    (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1797da14cebeSEric Cheng 	/*
1798da14cebeSEric Cheng 	 * Wait for the port's lacp timer thread and its notification callback
1799da14cebeSEric Cheng 	 * to exit before calling mac_unregister() since both needs to access
1800da14cebeSEric Cheng 	 * the mac perimeter of the grp.
1801da14cebeSEric Cheng 	 */
1802da14cebeSEric Cheng 	aggr_grp_port_wait(grp);
1803da14cebeSEric Cheng 
1804da14cebeSEric Cheng 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1805da14cebeSEric Cheng 	grp->lg_mh = NULL;
1806da14cebeSEric Cheng 
1807da14cebeSEric Cheng 	AGGR_GRP_REFRELE(grp);
18087c478bd9Sstevel@tonic-gate 	return (0);
18097c478bd9Sstevel@tonic-gate }
18107c478bd9Sstevel@tonic-gate 
18117c478bd9Sstevel@tonic-gate void
aggr_grp_free(aggr_grp_t * grp)18127c478bd9Sstevel@tonic-gate aggr_grp_free(aggr_grp_t *grp)
18137c478bd9Sstevel@tonic-gate {
18147c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_refs == 0);
1815da14cebeSEric Cheng 	ASSERT(grp->lg_port_ref == 0);
1816d62bc4baSyz147064 	if (grp->lg_key > AGGR_MAX_KEY) {
1817d62bc4baSyz147064 		id_free(key_ids, grp->lg_key);
1818d62bc4baSyz147064 		grp->lg_key = 0;
1819d62bc4baSyz147064 	}
18207c478bd9Sstevel@tonic-gate 	kmem_cache_free(aggr_grp_cache, grp);
18217c478bd9Sstevel@tonic-gate }
18227c478bd9Sstevel@tonic-gate 
1823d62bc4baSyz147064 int
aggr_grp_info(datalink_id_t linkid,void * fn_arg,aggr_grp_info_new_grp_fn_t new_grp_fn,aggr_grp_info_new_port_fn_t new_port_fn,cred_t * cred)1824d62bc4baSyz147064 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1825d62bc4baSyz147064     aggr_grp_info_new_grp_fn_t new_grp_fn,
18262b24ab6bSSebastien Roy     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
18277c478bd9Sstevel@tonic-gate {
18287c478bd9Sstevel@tonic-gate 	aggr_grp_t	*grp;
18297c478bd9Sstevel@tonic-gate 	aggr_port_t	*port;
1830da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
1831d62bc4baSyz147064 	int		rc = 0;
18327c478bd9Sstevel@tonic-gate 
18332b24ab6bSSebastien Roy 	/*
18342b24ab6bSSebastien Roy 	 * Make sure that the aggregation link is visible from the caller's
18352b24ab6bSSebastien Roy 	 * zone.
18362b24ab6bSSebastien Roy 	 */
18372b24ab6bSSebastien Roy 	if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
18382b24ab6bSSebastien Roy 		return (ENOENT);
18392b24ab6bSSebastien Roy 
1840d62bc4baSyz147064 	rw_enter(&aggr_grp_lock, RW_READER);
18417c478bd9Sstevel@tonic-gate 
1842d62bc4baSyz147064 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1843d62bc4baSyz147064 	    (mod_hash_val_t *)&grp) != 0) {
1844d62bc4baSyz147064 		rw_exit(&aggr_grp_lock);
1845d62bc4baSyz147064 		return (ENOENT);
1846d62bc4baSyz147064 	}
1847da14cebeSEric Cheng 	AGGR_GRP_REFHOLD(grp);
18487c478bd9Sstevel@tonic-gate 
1849da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1850da14cebeSEric Cheng 	rw_exit(&aggr_grp_lock);
18517c478bd9Sstevel@tonic-gate 
1852d62bc4baSyz147064 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1853d62bc4baSyz147064 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1854d62bc4baSyz147064 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
18557c478bd9Sstevel@tonic-gate 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
18567c478bd9Sstevel@tonic-gate 
1857d62bc4baSyz147064 	if (rc != 0)
18587c478bd9Sstevel@tonic-gate 		goto bail;
18597c478bd9Sstevel@tonic-gate 
18607c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1861da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1862d62bc4baSyz147064 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1863d62bc4baSyz147064 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1864da14cebeSEric Cheng 		mac_perim_exit(pmph);
18657c478bd9Sstevel@tonic-gate 
1866d62bc4baSyz147064 		if (rc != 0)
18677c478bd9Sstevel@tonic-gate 			goto bail;
18687c478bd9Sstevel@tonic-gate 	}
18697c478bd9Sstevel@tonic-gate 
18707c478bd9Sstevel@tonic-gate bail:
1871da14cebeSEric Cheng 	mac_perim_exit(mph);
1872da14cebeSEric Cheng 	AGGR_GRP_REFRELE(grp);
18737c478bd9Sstevel@tonic-gate 	return (rc);
18747c478bd9Sstevel@tonic-gate }
18757c478bd9Sstevel@tonic-gate 
18767c478bd9Sstevel@tonic-gate /*ARGSUSED*/
18777c478bd9Sstevel@tonic-gate static void
aggr_m_ioctl(void * arg,queue_t * q,mblk_t * mp)18787c478bd9Sstevel@tonic-gate aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
18797c478bd9Sstevel@tonic-gate {
18807c478bd9Sstevel@tonic-gate 	miocnak(q, mp, 0, ENOTSUP);
18817c478bd9Sstevel@tonic-gate }
18827c478bd9Sstevel@tonic-gate 
1883ba2e4443Sseb static int
aggr_grp_stat(aggr_grp_t * grp,uint_t stat,uint64_t * val)1884ba2e4443Sseb aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1885ba2e4443Sseb {
1886ba2e4443Sseb 	aggr_port_t	*port;
1887ba2e4443Sseb 	uint_t		stat_index;
1888ba2e4443Sseb 
1889ba2e4443Sseb 	/* We only aggregate counter statistics. */
1890ba2e4443Sseb 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1891ba2e4443Sseb 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1892ba2e4443Sseb 		return (ENOTSUP);
1893ba2e4443Sseb 	}
1894ba2e4443Sseb 
1895ba2e4443Sseb 	/*
1896ba2e4443Sseb 	 * Counter statistics for a group are computed by aggregating the
1897ba2e4443Sseb 	 * counters of the members MACs while they were aggregated, plus
1898ba2e4443Sseb 	 * the residual counter of the group itself, which is updated each
1899ba2e4443Sseb 	 * time a MAC is removed from the group.
1900ba2e4443Sseb 	 */
1901ba2e4443Sseb 	*val = 0;
1902ba2e4443Sseb 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1903ba2e4443Sseb 		/* actual port statistic */
1904ba2e4443Sseb 		*val += aggr_port_stat(port, stat);
1905ba2e4443Sseb 		/*
1906ba2e4443Sseb 		 * minus the port stat when it was added, plus any residual
1907d62bc4baSyz147064 		 * amount for the group.
1908ba2e4443Sseb 		 */
1909ba2e4443Sseb 		if (IS_MAC_STAT(stat)) {
1910ba2e4443Sseb 			stat_index = stat - MAC_STAT_MIN;
1911ba2e4443Sseb 			*val -= port->lp_stat[stat_index];
1912ba2e4443Sseb 			*val += grp->lg_stat[stat_index];
1913ba2e4443Sseb 		} else if (IS_MACTYPE_STAT(stat)) {
1914ba2e4443Sseb 			stat_index = stat - MACTYPE_STAT_MIN;
1915ba2e4443Sseb 			*val -= port->lp_ether_stat[stat_index];
1916ba2e4443Sseb 			*val += grp->lg_ether_stat[stat_index];
1917ba2e4443Sseb 		}
1918ba2e4443Sseb 	}
1919ba2e4443Sseb 	return (0);
1920ba2e4443Sseb }
1921ba2e4443Sseb 
19220dc2366fSVenugopal Iyer int
aggr_rx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)19230dc2366fSVenugopal Iyer aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
19240dc2366fSVenugopal Iyer {
19250dc2366fSVenugopal Iyer 	aggr_pseudo_rx_ring_t   *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver;
19260dc2366fSVenugopal Iyer 
19270dc2366fSVenugopal Iyer 	if (rx_ring->arr_hw_rh != NULL) {
19280dc2366fSVenugopal Iyer 		*val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat);
19290dc2366fSVenugopal Iyer 	} else {
19300dc2366fSVenugopal Iyer 		aggr_port_t	*port = rx_ring->arr_port;
19310dc2366fSVenugopal Iyer 
19320dc2366fSVenugopal Iyer 		*val = mac_stat_get(port->lp_mh, stat);
19330dc2366fSVenugopal Iyer 
19340dc2366fSVenugopal Iyer 	}
19350dc2366fSVenugopal Iyer 	return (0);
19360dc2366fSVenugopal Iyer }
19370dc2366fSVenugopal Iyer 
19380dc2366fSVenugopal Iyer int
aggr_tx_ring_stat(mac_ring_driver_t rdriver,uint_t stat,uint64_t * val)19390dc2366fSVenugopal Iyer aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
19400dc2366fSVenugopal Iyer {
19410dc2366fSVenugopal Iyer 	aggr_pseudo_tx_ring_t   *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
19420dc2366fSVenugopal Iyer 
19430dc2366fSVenugopal Iyer 	if (tx_ring->atr_hw_rh != NULL) {
19440dc2366fSVenugopal Iyer 		*val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
19450dc2366fSVenugopal Iyer 	} else {
19460dc2366fSVenugopal Iyer 		aggr_port_t	*port = tx_ring->atr_port;
19470dc2366fSVenugopal Iyer 
19480dc2366fSVenugopal Iyer 		*val = mac_stat_get(port->lp_mh, stat);
19490dc2366fSVenugopal Iyer 	}
19500dc2366fSVenugopal Iyer 	return (0);
19510dc2366fSVenugopal Iyer }
19520dc2366fSVenugopal Iyer 
1953ba2e4443Sseb static int
aggr_m_stat(void * arg,uint_t stat,uint64_t * val)1954ba2e4443Sseb aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
19557c478bd9Sstevel@tonic-gate {
19567c478bd9Sstevel@tonic-gate 	aggr_grp_t		*grp = arg;
1957da14cebeSEric Cheng 	mac_perim_handle_t	mph;
1958ba2e4443Sseb 	int			rval = 0;
19597c478bd9Sstevel@tonic-gate 
1960da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
19617c478bd9Sstevel@tonic-gate 
19627c478bd9Sstevel@tonic-gate 	switch (stat) {
19637c478bd9Sstevel@tonic-gate 	case MAC_STAT_IFSPEED:
1964ba2e4443Sseb 		*val = grp->lg_ifspeed;
19657c478bd9Sstevel@tonic-gate 		break;
1966ba2e4443Sseb 
1967ba2e4443Sseb 	case ETHER_STAT_LINK_DUPLEX:
1968ba2e4443Sseb 		*val = grp->lg_link_duplex;
19697c478bd9Sstevel@tonic-gate 		break;
1970ba2e4443Sseb 
19717c478bd9Sstevel@tonic-gate 	default:
19727c478bd9Sstevel@tonic-gate 		/*
1973ba2e4443Sseb 		 * For all other statistics, we return the aggregated stat
1974ba2e4443Sseb 		 * from the underlying ports.  aggr_grp_stat() will set
1975ba2e4443Sseb 		 * rval appropriately if the statistic isn't a counter.
19767c478bd9Sstevel@tonic-gate 		 */
1977ba2e4443Sseb 		rval = aggr_grp_stat(grp, stat, val);
19787c478bd9Sstevel@tonic-gate 	}
19797c478bd9Sstevel@tonic-gate 
1980da14cebeSEric Cheng 	mac_perim_exit(mph);
1981ba2e4443Sseb 	return (rval);
19827c478bd9Sstevel@tonic-gate }
19837c478bd9Sstevel@tonic-gate 
19847c478bd9Sstevel@tonic-gate static int
aggr_m_start(void * arg)19857c478bd9Sstevel@tonic-gate aggr_m_start(void *arg)
19867c478bd9Sstevel@tonic-gate {
19877c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
19887c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
1989da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
19907c478bd9Sstevel@tonic-gate 
1991da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
19927c478bd9Sstevel@tonic-gate 
19937c478bd9Sstevel@tonic-gate 	/*
19947c478bd9Sstevel@tonic-gate 	 * Attempts to start all configured members of the group.
19957c478bd9Sstevel@tonic-gate 	 * Group members will be attached when their link-up notification
19967c478bd9Sstevel@tonic-gate 	 * is received.
19977c478bd9Sstevel@tonic-gate 	 */
19987c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1999da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
20007c478bd9Sstevel@tonic-gate 		if (aggr_port_start(port) != 0) {
2001da14cebeSEric Cheng 			mac_perim_exit(pmph);
20027c478bd9Sstevel@tonic-gate 			continue;
20037c478bd9Sstevel@tonic-gate 		}
20047c478bd9Sstevel@tonic-gate 
2005da14cebeSEric Cheng 		/*
2006da14cebeSEric Cheng 		 * Turn on the promiscuous mode if it is required to receive
2007da14cebeSEric Cheng 		 * the non-primary address over a port, or the promiscous
2008da14cebeSEric Cheng 		 * mode is enabled over the aggr.
2009da14cebeSEric Cheng 		 */
2010da14cebeSEric Cheng 		if (grp->lg_promisc || port->lp_prom_addr != NULL) {
2011da14cebeSEric Cheng 			if (aggr_port_promisc(port, B_TRUE) != 0)
20127c478bd9Sstevel@tonic-gate 				aggr_port_stop(port);
2013da14cebeSEric Cheng 		}
2014da14cebeSEric Cheng 		mac_perim_exit(pmph);
20157c478bd9Sstevel@tonic-gate 	}
20167c478bd9Sstevel@tonic-gate 
20177c478bd9Sstevel@tonic-gate 	grp->lg_started = B_TRUE;
20187c478bd9Sstevel@tonic-gate 
2019da14cebeSEric Cheng 	mac_perim_exit(mph);
20207c478bd9Sstevel@tonic-gate 	return (0);
20217c478bd9Sstevel@tonic-gate }
20227c478bd9Sstevel@tonic-gate 
20237c478bd9Sstevel@tonic-gate static void
aggr_m_stop(void * arg)20247c478bd9Sstevel@tonic-gate aggr_m_stop(void *arg)
20257c478bd9Sstevel@tonic-gate {
20267c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
20277c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
2028da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
20297c478bd9Sstevel@tonic-gate 
2030da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
20317c478bd9Sstevel@tonic-gate 
20327c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2033da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
2034da14cebeSEric Cheng 
2035da14cebeSEric Cheng 		/* reset port promiscuous mode */
2036da14cebeSEric Cheng 		(void) aggr_port_promisc(port, B_FALSE);
2037da14cebeSEric Cheng 
20387c478bd9Sstevel@tonic-gate 		aggr_port_stop(port);
2039da14cebeSEric Cheng 		mac_perim_exit(pmph);
20407c478bd9Sstevel@tonic-gate 	}
20417c478bd9Sstevel@tonic-gate 
20427c478bd9Sstevel@tonic-gate 	grp->lg_started = B_FALSE;
2043da14cebeSEric Cheng 	mac_perim_exit(mph);
20447c478bd9Sstevel@tonic-gate }
20457c478bd9Sstevel@tonic-gate 
20467c478bd9Sstevel@tonic-gate static int
aggr_m_promisc(void * arg,boolean_t on)20477c478bd9Sstevel@tonic-gate aggr_m_promisc(void *arg, boolean_t on)
20487c478bd9Sstevel@tonic-gate {
20497c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
20507c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
20514deae11aSyz147064 	boolean_t link_state_changed = B_FALSE;
2052da14cebeSEric Cheng 	mac_perim_handle_t mph, pmph;
20537c478bd9Sstevel@tonic-gate 
20547c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFHOLD(grp);
2055da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
20567c478bd9Sstevel@tonic-gate 
20574deae11aSyz147064 	ASSERT(!grp->lg_closing);
20584deae11aSyz147064 
20597c478bd9Sstevel@tonic-gate 	if (on == grp->lg_promisc)
20607c478bd9Sstevel@tonic-gate 		goto bail;
20617c478bd9Sstevel@tonic-gate 
20627c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2063da14cebeSEric Cheng 		int	err = 0;
2064da14cebeSEric Cheng 
2065da14cebeSEric Cheng 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
20667c478bd9Sstevel@tonic-gate 		AGGR_PORT_REFHOLD(port);
2067da14cebeSEric Cheng 		if (!on && (port->lp_prom_addr == NULL))
2068da14cebeSEric Cheng 			err = aggr_port_promisc(port, B_FALSE);
2069da14cebeSEric Cheng 		else if (on && port->lp_started)
2070da14cebeSEric Cheng 			err = aggr_port_promisc(port, B_TRUE);
2071da14cebeSEric Cheng 
2072da14cebeSEric Cheng 		if (err != 0) {
2073da14cebeSEric Cheng 			if (aggr_grp_detach_port(grp, port))
2074392b1d6eSyz147064 				link_state_changed = B_TRUE;
20754deae11aSyz147064 		} else {
20764deae11aSyz147064 			/*
20774deae11aSyz147064 			 * If a port was detached because of a previous
20784deae11aSyz147064 			 * failure changing the promiscuity, the port
20794deae11aSyz147064 			 * is reattached when it successfully changes
20804deae11aSyz147064 			 * the promiscuity now, and this might cause
20814deae11aSyz147064 			 * the link state of the aggregation to change.
20824deae11aSyz147064 			 */
2083392b1d6eSyz147064 			if (aggr_grp_attach_port(grp, port))
2084392b1d6eSyz147064 				link_state_changed = B_TRUE;
20854deae11aSyz147064 		}
2086da14cebeSEric Cheng 		mac_perim_exit(pmph);
20877c478bd9Sstevel@tonic-gate 		AGGR_PORT_REFRELE(port);
20887c478bd9Sstevel@tonic-gate 	}
20897c478bd9Sstevel@tonic-gate 
20907c478bd9Sstevel@tonic-gate 	grp->lg_promisc = on;
20917c478bd9Sstevel@tonic-gate 
20924deae11aSyz147064 	if (link_state_changed)
2093ba2e4443Sseb 		mac_link_update(grp->lg_mh, grp->lg_link_state);
20944deae11aSyz147064 
20957c478bd9Sstevel@tonic-gate bail:
2096da14cebeSEric Cheng 	mac_perim_exit(mph);
20977c478bd9Sstevel@tonic-gate 	AGGR_GRP_REFRELE(grp);
20987c478bd9Sstevel@tonic-gate 
20997c478bd9Sstevel@tonic-gate 	return (0);
21007c478bd9Sstevel@tonic-gate }
21017c478bd9Sstevel@tonic-gate 
2102da14cebeSEric Cheng static void
aggr_grp_port_rename(const char * new_name,void * arg)2103da14cebeSEric Cheng aggr_grp_port_rename(const char *new_name, void *arg)
2104da14cebeSEric Cheng {
2105da14cebeSEric Cheng 	/*
2106da14cebeSEric Cheng 	 * aggr port's mac client name is the format of "aggr link name" plus
2107da14cebeSEric Cheng 	 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2108da14cebeSEric Cheng 	 */
2109da14cebeSEric Cheng 	int aggr_len, link_len, clnt_name_len, i;
2110da14cebeSEric Cheng 	char *str_end, *str_st, *str_del;
2111da14cebeSEric Cheng 	char aggr_name[MAXNAMELEN];
2112da14cebeSEric Cheng 	char link_name[MAXNAMELEN];
2113da14cebeSEric Cheng 	char *clnt_name;
2114da14cebeSEric Cheng 	aggr_grp_t *aggr_grp = arg;
2115da14cebeSEric Cheng 	aggr_port_t *aggr_port = aggr_grp->lg_ports;
2116da14cebeSEric Cheng 
2117da14cebeSEric Cheng 	for (i = 0; i < aggr_grp->lg_nports; i++) {
2118da14cebeSEric Cheng 		clnt_name = mac_client_name(aggr_port->lp_mch);
2119da14cebeSEric Cheng 		clnt_name_len = strlen(clnt_name);
2120da14cebeSEric Cheng 		str_st = clnt_name;
2121da14cebeSEric Cheng 		str_end = &(clnt_name[clnt_name_len]);
2122da14cebeSEric Cheng 		str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
2123da14cebeSEric Cheng 		ASSERT(str_del != NULL);
2124da14cebeSEric Cheng 		aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
2125da14cebeSEric Cheng 		link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
2126da14cebeSEric Cheng 		bzero(aggr_name, MAXNAMELEN);
2127da14cebeSEric Cheng 		bzero(link_name, MAXNAMELEN);
2128da14cebeSEric Cheng 		bcopy(clnt_name, aggr_name, aggr_len);
2129da14cebeSEric Cheng 		bcopy(str_del, link_name, link_len + 1);
2130da14cebeSEric Cheng 		bzero(clnt_name, MAXNAMELEN);
2131da14cebeSEric Cheng 		(void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
2132da14cebeSEric Cheng 		    link_name);
2133da14cebeSEric Cheng 
2134da14cebeSEric Cheng 		(void) mac_rename_primary(aggr_port->lp_mh, NULL);
2135da14cebeSEric Cheng 		aggr_port = aggr_port->lp_next;
2136da14cebeSEric Cheng 	}
2137da14cebeSEric Cheng }
2138da14cebeSEric Cheng 
21397c478bd9Sstevel@tonic-gate /*
2140ba2e4443Sseb  * Initialize the capabilities that are advertised for the group
2141ba2e4443Sseb  * according to the capabilities of the constituent ports.
2142ba2e4443Sseb  */
2143ba2e4443Sseb static boolean_t
aggr_m_capab_get(void * arg,mac_capab_t cap,void * cap_data)2144ba2e4443Sseb aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
2145ba2e4443Sseb {
2146ba2e4443Sseb 	aggr_grp_t *grp = arg;
2147ba2e4443Sseb 
2148ba2e4443Sseb 	switch (cap) {
2149ba2e4443Sseb 	case MAC_CAPAB_HCKSUM: {
2150ba2e4443Sseb 		uint32_t *hcksum_txflags = cap_data;
2151ba2e4443Sseb 		*hcksum_txflags = grp->lg_hcksum_txflags;
2152ba2e4443Sseb 		break;
2153ba2e4443Sseb 	}
215419c868a0SRoamer 	case MAC_CAPAB_LSO: {
215519c868a0SRoamer 		mac_capab_lso_t *cap_lso = cap_data;
215619c868a0SRoamer 
215719c868a0SRoamer 		if (grp->lg_lso) {
215819c868a0SRoamer 			*cap_lso = grp->lg_cap_lso;
215919c868a0SRoamer 			break;
216019c868a0SRoamer 		} else {
216119c868a0SRoamer 			return (B_FALSE);
216219c868a0SRoamer 		}
216319c868a0SRoamer 	}
2164d62bc4baSyz147064 	case MAC_CAPAB_NO_NATIVEVLAN:
2165d62bc4baSyz147064 		return (!grp->lg_vlan);
2166d62bc4baSyz147064 	case MAC_CAPAB_NO_ZCOPY:
2167d62bc4baSyz147064 		return (!grp->lg_zcopy);
2168da14cebeSEric Cheng 	case MAC_CAPAB_RINGS: {
2169da14cebeSEric Cheng 		mac_capab_rings_t *cap_rings = cap_data;
2170da14cebeSEric Cheng 
2171da14cebeSEric Cheng 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2172da14cebeSEric Cheng 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2173da14cebeSEric Cheng 			cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
2174da14cebeSEric Cheng 
2175da14cebeSEric Cheng 			/*
2176da14cebeSEric Cheng 			 * An aggregation advertises only one (pseudo) RX
2177da14cebeSEric Cheng 			 * group, which virtualizes the main/primary group of
2178da14cebeSEric Cheng 			 * the underlying devices.
2179da14cebeSEric Cheng 			 */
2180da14cebeSEric Cheng 			cap_rings->mr_gnum = 1;
2181da14cebeSEric Cheng 			cap_rings->mr_gaddring = NULL;
2182da14cebeSEric Cheng 			cap_rings->mr_gremring = NULL;
2183da14cebeSEric Cheng 		} else {
21840dc2366fSVenugopal Iyer 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
21850dc2366fSVenugopal Iyer 			cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt;
21860dc2366fSVenugopal Iyer 			cap_rings->mr_gnum = 0;
2187da14cebeSEric Cheng 		}
21880dc2366fSVenugopal Iyer 		cap_rings->mr_rget = aggr_fill_ring;
21890dc2366fSVenugopal Iyer 		cap_rings->mr_gget = aggr_fill_group;
2190da14cebeSEric Cheng 		break;
2191da14cebeSEric Cheng 	}
2192da14cebeSEric Cheng 	case MAC_CAPAB_AGGR:
2193da14cebeSEric Cheng 	{
2194da14cebeSEric Cheng 		mac_capab_aggr_t *aggr_cap;
2195da14cebeSEric Cheng 
2196da14cebeSEric Cheng 		if (cap_data != NULL) {
2197da14cebeSEric Cheng 			aggr_cap = cap_data;
2198da14cebeSEric Cheng 			aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2199da14cebeSEric Cheng 			aggr_cap->mca_unicst = aggr_m_unicst;
22000dc2366fSVenugopal Iyer 			aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
22010dc2366fSVenugopal Iyer 			aggr_cap->mca_arg = arg;
2202da14cebeSEric Cheng 		}
2203da14cebeSEric Cheng 		return (B_TRUE);
2204da14cebeSEric Cheng 	}
2205ba2e4443Sseb 	default:
2206ba2e4443Sseb 		return (B_FALSE);
2207ba2e4443Sseb 	}
2208ba2e4443Sseb 	return (B_TRUE);
2209ba2e4443Sseb }
2210ba2e4443Sseb 
2211da14cebeSEric Cheng /*
2212da14cebeSEric Cheng  * Callback funtion for MAC layer to register groups.
2213da14cebeSEric Cheng  */
2214da14cebeSEric Cheng static void
aggr_fill_group(void * arg,mac_ring_type_t rtype,const int index,mac_group_info_t * infop,mac_group_handle_t gh)2215da14cebeSEric Cheng aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2216da14cebeSEric Cheng     mac_group_info_t *infop, mac_group_handle_t gh)
221719599311Sudpa {
2218da14cebeSEric Cheng 	aggr_grp_t *grp = arg;
2219da14cebeSEric Cheng 	aggr_pseudo_rx_group_t *rx_group;
22200dc2366fSVenugopal Iyer 	aggr_pseudo_tx_group_t *tx_group;
222119599311Sudpa 
22220dc2366fSVenugopal Iyer 	ASSERT(index == 0);
22230dc2366fSVenugopal Iyer 	if (rtype == MAC_RING_TYPE_RX) {
2224da14cebeSEric Cheng 		rx_group = &grp->lg_rx_group;
2225da14cebeSEric Cheng 		rx_group->arg_gh = gh;
2226da14cebeSEric Cheng 		rx_group->arg_grp = grp;
222719599311Sudpa 
2228da14cebeSEric Cheng 		infop->mgi_driver = (mac_group_driver_t)rx_group;
2229da14cebeSEric Cheng 		infop->mgi_start = NULL;
2230da14cebeSEric Cheng 		infop->mgi_stop = NULL;
2231da14cebeSEric Cheng 		infop->mgi_addmac = aggr_addmac;
2232da14cebeSEric Cheng 		infop->mgi_remmac = aggr_remmac;
2233da14cebeSEric Cheng 		infop->mgi_count = rx_group->arg_ring_cnt;
22340dc2366fSVenugopal Iyer 	} else {
22350dc2366fSVenugopal Iyer 		tx_group = &grp->lg_tx_group;
22360dc2366fSVenugopal Iyer 		tx_group->atg_gh = gh;
22370dc2366fSVenugopal Iyer 	}
2238da14cebeSEric Cheng }
2239da14cebeSEric Cheng 
2240da14cebeSEric Cheng /*
2241da14cebeSEric Cheng  * Callback funtion for MAC layer to register all rings.
2242da14cebeSEric Cheng  */
2243da14cebeSEric Cheng static void
aggr_fill_ring(void * arg,mac_ring_type_t rtype,const int rg_index,const int index,mac_ring_info_t * infop,mac_ring_handle_t rh)2244da14cebeSEric Cheng aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2245da14cebeSEric Cheng     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2246da14cebeSEric Cheng {
2247da14cebeSEric Cheng 	aggr_grp_t	*grp = arg;
2248da14cebeSEric Cheng 
2249da14cebeSEric Cheng 	switch (rtype) {
2250da14cebeSEric Cheng 	case MAC_RING_TYPE_RX: {
2251da14cebeSEric Cheng 		aggr_pseudo_rx_group_t	*rx_group = &grp->lg_rx_group;
2252da14cebeSEric Cheng 		aggr_pseudo_rx_ring_t	*rx_ring;
2253da14cebeSEric Cheng 		mac_intr_t		aggr_mac_intr;
2254da14cebeSEric Cheng 
2255da14cebeSEric Cheng 		ASSERT(rg_index == 0);
2256da14cebeSEric Cheng 
2257da14cebeSEric Cheng 		ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
2258da14cebeSEric Cheng 		rx_ring = rx_group->arg_rings + index;
2259da14cebeSEric Cheng 		rx_ring->arr_rh = rh;
2260da14cebeSEric Cheng 
2261da14cebeSEric Cheng 		/*
2262da14cebeSEric Cheng 		 * Entrypoint to enable interrupt (disable poll) and
2263da14cebeSEric Cheng 		 * disable interrupt (enable poll).
2264da14cebeSEric Cheng 		 */
2265da14cebeSEric Cheng 		aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
2266da14cebeSEric Cheng 		aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
2267da14cebeSEric Cheng 		aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
22680dc2366fSVenugopal Iyer 		aggr_mac_intr.mi_ddi_handle = NULL;
2269da14cebeSEric Cheng 
2270da14cebeSEric Cheng 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
2271da14cebeSEric Cheng 		infop->mri_start = aggr_pseudo_start_ring;
2272da14cebeSEric Cheng 		infop->mri_stop = aggr_pseudo_stop_ring;
2273da14cebeSEric Cheng 
2274da14cebeSEric Cheng 		infop->mri_intr = aggr_mac_intr;
2275da14cebeSEric Cheng 		infop->mri_poll = aggr_rx_poll;
22760dc2366fSVenugopal Iyer 
22770dc2366fSVenugopal Iyer 		infop->mri_stat = aggr_rx_ring_stat;
22780dc2366fSVenugopal Iyer 		break;
22790dc2366fSVenugopal Iyer 	}
22800dc2366fSVenugopal Iyer 	case MAC_RING_TYPE_TX: {
22810dc2366fSVenugopal Iyer 		aggr_pseudo_tx_group_t	*tx_group = &grp->lg_tx_group;
22820dc2366fSVenugopal Iyer 		aggr_pseudo_tx_ring_t	*tx_ring;
22830dc2366fSVenugopal Iyer 
22840dc2366fSVenugopal Iyer 		ASSERT(rg_index == -1);
22850dc2366fSVenugopal Iyer 		ASSERT(index < tx_group->atg_ring_cnt);
22860dc2366fSVenugopal Iyer 
22870dc2366fSVenugopal Iyer 		tx_ring = &tx_group->atg_rings[index];
22880dc2366fSVenugopal Iyer 		tx_ring->atr_rh = rh;
22890dc2366fSVenugopal Iyer 
22900dc2366fSVenugopal Iyer 		infop->mri_driver = (mac_ring_driver_t)tx_ring;
22910dc2366fSVenugopal Iyer 		infop->mri_start = NULL;
22920dc2366fSVenugopal Iyer 		infop->mri_stop = NULL;
22930dc2366fSVenugopal Iyer 		infop->mri_tx = aggr_ring_tx;
22940dc2366fSVenugopal Iyer 		infop->mri_stat = aggr_tx_ring_stat;
22950dc2366fSVenugopal Iyer 		/*
22960dc2366fSVenugopal Iyer 		 * Use the hw TX ring handle to find if the ring needs
22970dc2366fSVenugopal Iyer 		 * serialization or not. For NICs that do not expose
22980dc2366fSVenugopal Iyer 		 * Tx rings, atr_hw_rh will be NULL.
22990dc2366fSVenugopal Iyer 		 */
23000dc2366fSVenugopal Iyer 		if (tx_ring->atr_hw_rh != NULL) {
23010dc2366fSVenugopal Iyer 			infop->mri_flags =
23020dc2366fSVenugopal Iyer 			    mac_hwring_getinfo(tx_ring->atr_hw_rh);
23030dc2366fSVenugopal Iyer 		}
230419599311Sudpa 		break;
230519599311Sudpa 	}
2306da14cebeSEric Cheng 	default:
2307da14cebeSEric Cheng 		break;
230819599311Sudpa 	}
2309da14cebeSEric Cheng }
2310da14cebeSEric Cheng 
2311da14cebeSEric Cheng static mblk_t *
aggr_rx_poll(void * arg,int bytes_to_pickup)2312da14cebeSEric Cheng aggr_rx_poll(void *arg, int bytes_to_pickup)
2313da14cebeSEric Cheng {
2314da14cebeSEric Cheng 	aggr_pseudo_rx_ring_t *rr_ring = arg;
2315da14cebeSEric Cheng 	aggr_port_t *port = rr_ring->arr_port;
2316da14cebeSEric Cheng 	aggr_grp_t *grp = port->lp_grp;
2317da14cebeSEric Cheng 	mblk_t *mp_chain, *mp, **mpp;
2318da14cebeSEric Cheng 
2319da14cebeSEric Cheng 	mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
2320da14cebeSEric Cheng 
2321da14cebeSEric Cheng 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
2322da14cebeSEric Cheng 		return (mp_chain);
2323da14cebeSEric Cheng 
2324da14cebeSEric Cheng 	mpp = &mp_chain;
2325da14cebeSEric Cheng 	while ((mp = *mpp) != NULL) {
2326da14cebeSEric Cheng 		if (MBLKL(mp) >= sizeof (struct ether_header)) {
2327da14cebeSEric Cheng 			struct ether_header *ehp;
2328da14cebeSEric Cheng 
2329da14cebeSEric Cheng 			ehp = (struct ether_header *)mp->b_rptr;
2330da14cebeSEric Cheng 			if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
2331da14cebeSEric Cheng 				*mpp = mp->b_next;
2332da14cebeSEric Cheng 				mp->b_next = NULL;
2333da14cebeSEric Cheng 				aggr_recv_lacp(port,
2334da14cebeSEric Cheng 				    (mac_resource_handle_t)rr_ring, mp);
2335da14cebeSEric Cheng 				continue;
2336da14cebeSEric Cheng 			}
2337da14cebeSEric Cheng 		}
2338da14cebeSEric Cheng 
2339da14cebeSEric Cheng 		if (!port->lp_collector_enabled) {
2340da14cebeSEric Cheng 			*mpp = mp->b_next;
2341da14cebeSEric Cheng 			mp->b_next = NULL;
2342da14cebeSEric Cheng 			freemsg(mp);
2343da14cebeSEric Cheng 			continue;
2344da14cebeSEric Cheng 		}
2345da14cebeSEric Cheng 		mpp = &mp->b_next;
2346da14cebeSEric Cheng 	}
2347da14cebeSEric Cheng 	return (mp_chain);
2348da14cebeSEric Cheng }
2349da14cebeSEric Cheng 
2350da14cebeSEric Cheng static int
aggr_addmac(void * arg,const uint8_t * mac_addr)2351da14cebeSEric Cheng aggr_addmac(void *arg, const uint8_t *mac_addr)
2352da14cebeSEric Cheng {
2353da14cebeSEric Cheng 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
2354da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr, **pprev;
2355da14cebeSEric Cheng 	aggr_grp_t		*grp = rx_group->arg_grp;
2356da14cebeSEric Cheng 	aggr_port_t		*port, *p;
2357da14cebeSEric Cheng 	mac_perim_handle_t	mph;
2358da14cebeSEric Cheng 	int			err = 0;
2359da14cebeSEric Cheng 
2360da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2361da14cebeSEric Cheng 
2362da14cebeSEric Cheng 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2363da14cebeSEric Cheng 		mac_perim_exit(mph);
236419599311Sudpa 		return (0);
236519599311Sudpa 	}
236619599311Sudpa 
2367ba2e4443Sseb 	/*
2368da14cebeSEric Cheng 	 * Insert this mac address into the list of mac addresses owned by
2369da14cebeSEric Cheng 	 * the aggregation pseudo group.
2370da14cebeSEric Cheng 	 */
2371da14cebeSEric Cheng 	pprev = &rx_group->arg_macaddr;
2372da14cebeSEric Cheng 	while ((addr = *pprev) != NULL) {
2373da14cebeSEric Cheng 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
2374da14cebeSEric Cheng 			mac_perim_exit(mph);
2375da14cebeSEric Cheng 			return (EEXIST);
2376da14cebeSEric Cheng 		}
2377da14cebeSEric Cheng 		pprev = &addr->aua_next;
2378da14cebeSEric Cheng 	}
2379da14cebeSEric Cheng 	addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
2380da14cebeSEric Cheng 	bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
2381da14cebeSEric Cheng 	addr->aua_next = NULL;
2382da14cebeSEric Cheng 	*pprev = addr;
2383da14cebeSEric Cheng 
2384da14cebeSEric Cheng 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2385da14cebeSEric Cheng 		if ((err = aggr_port_addmac(port, mac_addr)) != 0)
2386da14cebeSEric Cheng 			break;
2387da14cebeSEric Cheng 
2388da14cebeSEric Cheng 	if (err != 0) {
2389da14cebeSEric Cheng 		for (p = grp->lg_ports; p != port; p = p->lp_next)
2390da14cebeSEric Cheng 			aggr_port_remmac(p, mac_addr);
2391da14cebeSEric Cheng 
2392da14cebeSEric Cheng 		*pprev = NULL;
2393da14cebeSEric Cheng 		kmem_free(addr, sizeof (aggr_unicst_addr_t));
2394da14cebeSEric Cheng 	}
2395da14cebeSEric Cheng 
2396da14cebeSEric Cheng 	mac_perim_exit(mph);
2397da14cebeSEric Cheng 	return (err);
2398da14cebeSEric Cheng }
2399da14cebeSEric Cheng 
2400da14cebeSEric Cheng static int
aggr_remmac(void * arg,const uint8_t * mac_addr)2401da14cebeSEric Cheng aggr_remmac(void *arg, const uint8_t *mac_addr)
2402da14cebeSEric Cheng {
2403da14cebeSEric Cheng 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
2404da14cebeSEric Cheng 	aggr_unicst_addr_t	*addr, **pprev;
2405da14cebeSEric Cheng 	aggr_grp_t		*grp = rx_group->arg_grp;
2406da14cebeSEric Cheng 	aggr_port_t		*port;
2407da14cebeSEric Cheng 	mac_perim_handle_t	mph;
2408da14cebeSEric Cheng 	int			err = 0;
2409da14cebeSEric Cheng 
2410da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2411da14cebeSEric Cheng 
2412da14cebeSEric Cheng 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2413da14cebeSEric Cheng 		mac_perim_exit(mph);
2414da14cebeSEric Cheng 		return (0);
2415da14cebeSEric Cheng 	}
2416da14cebeSEric Cheng 
2417da14cebeSEric Cheng 	/*
2418da14cebeSEric Cheng 	 * Insert this mac address into the list of mac addresses owned by
2419da14cebeSEric Cheng 	 * the aggregation pseudo group.
2420da14cebeSEric Cheng 	 */
2421da14cebeSEric Cheng 	pprev = &rx_group->arg_macaddr;
2422da14cebeSEric Cheng 	while ((addr = *pprev) != NULL) {
2423da14cebeSEric Cheng 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2424da14cebeSEric Cheng 			pprev = &addr->aua_next;
2425da14cebeSEric Cheng 			continue;
2426da14cebeSEric Cheng 		}
2427da14cebeSEric Cheng 		break;
2428da14cebeSEric Cheng 	}
2429da14cebeSEric Cheng 	if (addr == NULL) {
2430da14cebeSEric Cheng 		mac_perim_exit(mph);
2431da14cebeSEric Cheng 		return (EINVAL);
2432da14cebeSEric Cheng 	}
2433da14cebeSEric Cheng 
2434da14cebeSEric Cheng 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2435da14cebeSEric Cheng 		aggr_port_remmac(port, mac_addr);
2436da14cebeSEric Cheng 
2437da14cebeSEric Cheng 	*pprev = addr->aua_next;
2438da14cebeSEric Cheng 	kmem_free(addr, sizeof (aggr_unicst_addr_t));
2439da14cebeSEric Cheng 
2440da14cebeSEric Cheng 	mac_perim_exit(mph);
2441da14cebeSEric Cheng 	return (err);
2442da14cebeSEric Cheng }
2443da14cebeSEric Cheng 
2444da14cebeSEric Cheng /*
24457c478bd9Sstevel@tonic-gate  * Add or remove the multicast addresses that are defined for the group
24467c478bd9Sstevel@tonic-gate  * to or from the specified port.
2447ae6aa22aSVenugopal Iyer  *
2448ae6aa22aSVenugopal Iyer  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2449ae6aa22aSVenugopal Iyer  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2450ae6aa22aSVenugopal Iyer  * called when the port is either stopped or detached.
24517c478bd9Sstevel@tonic-gate  */
24527c478bd9Sstevel@tonic-gate void
aggr_grp_multicst_port(aggr_port_t * port,boolean_t add)24537c478bd9Sstevel@tonic-gate aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
24547c478bd9Sstevel@tonic-gate {
24557c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = port->lp_grp;
24567c478bd9Sstevel@tonic-gate 
2457da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
2458da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
24597c478bd9Sstevel@tonic-gate 
2460ae6aa22aSVenugopal Iyer 	if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
24617c478bd9Sstevel@tonic-gate 		return;
24627c478bd9Sstevel@tonic-gate 
2463da14cebeSEric Cheng 	mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
24647c478bd9Sstevel@tonic-gate }
24657c478bd9Sstevel@tonic-gate 
24667c478bd9Sstevel@tonic-gate static int
aggr_m_multicst(void * arg,boolean_t add,const uint8_t * addrp)24677c478bd9Sstevel@tonic-gate aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
24687c478bd9Sstevel@tonic-gate {
24697c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
24704a6df672SAnil udupa 	aggr_port_t *port = NULL, *errport = NULL;
2471da14cebeSEric Cheng 	mac_perim_handle_t mph;
24724a6df672SAnil udupa 	int err = 0;
24737c478bd9Sstevel@tonic-gate 
2474da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
24757c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2476ae6aa22aSVenugopal Iyer 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2477ae6aa22aSVenugopal Iyer 		    !port->lp_started) {
24787c478bd9Sstevel@tonic-gate 			continue;
2479ae6aa22aSVenugopal Iyer 		}
24804a6df672SAnil udupa 		err = aggr_port_multicst(port, add, addrp);
24814a6df672SAnil udupa 		if (err != 0) {
24824a6df672SAnil udupa 			errport = port;
24834a6df672SAnil udupa 			break;
24844a6df672SAnil udupa 		}
24854a6df672SAnil udupa 	}
24864a6df672SAnil udupa 
24874a6df672SAnil udupa 	/*
24884a6df672SAnil udupa 	 * At least one port caused error return and this error is returned to
24894a6df672SAnil udupa 	 * mac, eventually a NAK would be sent upwards.
24904a6df672SAnil udupa 	 * Some ports have this multicast address listed now, and some don't.
24914a6df672SAnil udupa 	 * Treat this error as a whole aggr failure not individual port failure.
24924a6df672SAnil udupa 	 * Therefore remove this multicast address from other ports.
24934a6df672SAnil udupa 	 */
24944a6df672SAnil udupa 	if ((err != 0) && add) {
24954a6df672SAnil udupa 		for (port = grp->lg_ports; port != errport;
24964a6df672SAnil udupa 		    port = port->lp_next) {
24974a6df672SAnil udupa 			if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
24984a6df672SAnil udupa 			    !port->lp_started) {
24994a6df672SAnil udupa 				continue;
25004a6df672SAnil udupa 			}
25014a6df672SAnil udupa 			(void) aggr_port_multicst(port, B_FALSE, addrp);
25024a6df672SAnil udupa 		}
25037c478bd9Sstevel@tonic-gate 	}
2504da14cebeSEric Cheng 	mac_perim_exit(mph);
25057c478bd9Sstevel@tonic-gate 	return (err);
25067c478bd9Sstevel@tonic-gate }
25077c478bd9Sstevel@tonic-gate 
25087c478bd9Sstevel@tonic-gate static int
aggr_m_unicst(void * arg,const uint8_t * macaddr)25097c478bd9Sstevel@tonic-gate aggr_m_unicst(void *arg, const uint8_t *macaddr)
25107c478bd9Sstevel@tonic-gate {
25117c478bd9Sstevel@tonic-gate 	aggr_grp_t *grp = arg;
2512da14cebeSEric Cheng 	mac_perim_handle_t mph;
2513da14cebeSEric Cheng 	int err;
25147c478bd9Sstevel@tonic-gate 
2515da14cebeSEric Cheng 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2516da14cebeSEric Cheng 	err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
25177c478bd9Sstevel@tonic-gate 	    0, 0);
2518da14cebeSEric Cheng 	mac_perim_exit(mph);
2519da14cebeSEric Cheng 	return (err);
25207c478bd9Sstevel@tonic-gate }
25217c478bd9Sstevel@tonic-gate 
25227c478bd9Sstevel@tonic-gate /*
25237c478bd9Sstevel@tonic-gate  * Initialize the capabilities that are advertised for the group
25247c478bd9Sstevel@tonic-gate  * according to the capabilities of the constituent ports.
25257c478bd9Sstevel@tonic-gate  */
25267c478bd9Sstevel@tonic-gate static void
aggr_grp_capab_set(aggr_grp_t * grp)25277c478bd9Sstevel@tonic-gate aggr_grp_capab_set(aggr_grp_t *grp)
25287c478bd9Sstevel@tonic-gate {
2529020da793Sseb 	uint32_t cksum;
25307c478bd9Sstevel@tonic-gate 	aggr_port_t *port;
253119c868a0SRoamer 	mac_capab_lso_t cap_lso;
25327c478bd9Sstevel@tonic-gate 
2533da14cebeSEric Cheng 	ASSERT(grp->lg_mh == NULL);
25347c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_ports != NULL);
2535ba2e4443Sseb 
2536ba2e4443Sseb 	grp->lg_hcksum_txflags = (uint32_t)-1;
2537d62bc4baSyz147064 	grp->lg_zcopy = B_TRUE;
2538d62bc4baSyz147064 	grp->lg_vlan = B_TRUE;
2539ba2e4443Sseb 
254019c868a0SRoamer 	grp->lg_lso = B_TRUE;
254119c868a0SRoamer 	grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
254219c868a0SRoamer 	grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
254319c868a0SRoamer 
25447c478bd9Sstevel@tonic-gate 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2545020da793Sseb 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2546020da793Sseb 			cksum = 0;
2547020da793Sseb 		grp->lg_hcksum_txflags &= cksum;
25487c478bd9Sstevel@tonic-gate 
2549d62bc4baSyz147064 		grp->lg_vlan &=
2550d62bc4baSyz147064 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2551d62bc4baSyz147064 
2552d62bc4baSyz147064 		grp->lg_zcopy &=
2553d62bc4baSyz147064 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
255419c868a0SRoamer 
255519c868a0SRoamer 		grp->lg_lso &=
255619c868a0SRoamer 		    mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
255719c868a0SRoamer 		if (grp->lg_lso) {
255819c868a0SRoamer 			grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
255919c868a0SRoamer 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
256019c868a0SRoamer 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
256119c868a0SRoamer 				grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
256219c868a0SRoamer 				    cap_lso.lso_basic_tcp_ipv4.lso_max;
256319c868a0SRoamer 		}
25647c478bd9Sstevel@tonic-gate 	}
2565ba2e4443Sseb }
2566ba2e4443Sseb 
25677c478bd9Sstevel@tonic-gate /*
2568ba2e4443Sseb  * Checks whether the capabilities of the port being added are compatible
25697c478bd9Sstevel@tonic-gate  * with the current capabilities of the aggregation.
25707c478bd9Sstevel@tonic-gate  */
25717c478bd9Sstevel@tonic-gate static boolean_t
aggr_grp_capab_check(aggr_grp_t * grp,aggr_port_t * port)25727c478bd9Sstevel@tonic-gate aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
25737c478bd9Sstevel@tonic-gate {
2574ba2e4443Sseb 	uint32_t hcksum_txflags;
25757c478bd9Sstevel@tonic-gate 
25767c478bd9Sstevel@tonic-gate 	ASSERT(grp->lg_ports != NULL);
25777c478bd9Sstevel@tonic-gate 
2578d62bc4baSyz147064 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2579d62bc4baSyz147064 	    grp->lg_vlan) != grp->lg_vlan) {
2580d62bc4baSyz147064 		return (B_FALSE);
2581d62bc4baSyz147064 	}
2582d62bc4baSyz147064 
2583d62bc4baSyz147064 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2584d62bc4baSyz147064 	    grp->lg_zcopy) != grp->lg_zcopy) {
2585d62bc4baSyz147064 		return (B_FALSE);
2586d62bc4baSyz147064 	}
2587d62bc4baSyz147064 
2588ba2e4443Sseb 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2589ba2e4443Sseb 		if (grp->lg_hcksum_txflags != 0)
2590ba2e4443Sseb 			return (B_FALSE);
2591ba2e4443Sseb 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2592ba2e4443Sseb 	    grp->lg_hcksum_txflags) {
2593ba2e4443Sseb 		return (B_FALSE);
2594ba2e4443Sseb 	}
2595ba2e4443Sseb 
259619c868a0SRoamer 	if (grp->lg_lso) {
259719c868a0SRoamer 		mac_capab_lso_t cap_lso;
259819c868a0SRoamer 
259919c868a0SRoamer 		if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
260019c868a0SRoamer 			if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
260119c868a0SRoamer 			    grp->lg_cap_lso.lso_flags)
260219c868a0SRoamer 				return (B_FALSE);
260319c868a0SRoamer 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
260419c868a0SRoamer 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
260519c868a0SRoamer 				return (B_FALSE);
260619c868a0SRoamer 		} else {
260719c868a0SRoamer 			return (B_FALSE);
260819c868a0SRoamer 		}
260919c868a0SRoamer 	}
261019c868a0SRoamer 
2611ba2e4443Sseb 	return (B_TRUE);
26127c478bd9Sstevel@tonic-gate }
2613f4420ae7Snd99603 
2614f4420ae7Snd99603 /*
2615f4420ae7Snd99603  * Returns the maximum SDU according to the SDU of the constituent ports.
2616f4420ae7Snd99603  */
2617f4420ae7Snd99603 static uint_t
aggr_grp_max_sdu(aggr_grp_t * grp)2618f4420ae7Snd99603 aggr_grp_max_sdu(aggr_grp_t *grp)
2619f4420ae7Snd99603 {
2620f4420ae7Snd99603 	uint_t max_sdu = (uint_t)-1;
2621f4420ae7Snd99603 	aggr_port_t *port;
2622f4420ae7Snd99603 
2623f4420ae7Snd99603 	ASSERT(grp->lg_ports != NULL);
2624f4420ae7Snd99603 
2625f4420ae7Snd99603 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2626e7801d59Ssowmini 		uint_t port_sdu_max;
2627e7801d59Ssowmini 
2628e7801d59Ssowmini 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2629e7801d59Ssowmini 		if (max_sdu > port_sdu_max)
2630e7801d59Ssowmini 			max_sdu = port_sdu_max;
2631f4420ae7Snd99603 	}
2632f4420ae7Snd99603 
2633f4420ae7Snd99603 	return (max_sdu);
2634f4420ae7Snd99603 }
2635f4420ae7Snd99603 
2636f4420ae7Snd99603 /*
2637f4420ae7Snd99603  * Checks if the maximum SDU of the specified port is compatible
2638f4420ae7Snd99603  * with the maximum SDU of the specified aggregation group, returns
2639f4420ae7Snd99603  * B_TRUE if it is, B_FALSE otherwise.
2640f4420ae7Snd99603  */
2641f4420ae7Snd99603 static boolean_t
aggr_grp_sdu_check(aggr_grp_t * grp,aggr_port_t * port)2642f4420ae7Snd99603 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2643f4420ae7Snd99603 {
2644e7801d59Ssowmini 	uint_t port_sdu_max;
2645f4420ae7Snd99603 
2646e7801d59Ssowmini 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2647e7801d59Ssowmini 	return (port_sdu_max >= grp->lg_max_sdu);
2648f4420ae7Snd99603 }
2649d62bc4baSyz147064 
2650d62bc4baSyz147064 /*
2651d62bc4baSyz147064  * Returns the maximum margin according to the margin of the constituent ports.
2652d62bc4baSyz147064  */
2653d62bc4baSyz147064 static uint32_t
aggr_grp_max_margin(aggr_grp_t * grp)2654d62bc4baSyz147064 aggr_grp_max_margin(aggr_grp_t *grp)
2655d62bc4baSyz147064 {
2656d62bc4baSyz147064 	uint32_t margin = UINT32_MAX;
2657d62bc4baSyz147064 	aggr_port_t *port;
2658d62bc4baSyz147064 
2659da14cebeSEric Cheng 	ASSERT(grp->lg_mh == NULL);
2660d62bc4baSyz147064 	ASSERT(grp->lg_ports != NULL);
2661d62bc4baSyz147064 
2662d62bc4baSyz147064 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2663d62bc4baSyz147064 		if (margin > port->lp_margin)
2664d62bc4baSyz147064 			margin = port->lp_margin;
2665d62bc4baSyz147064 	}
2666d62bc4baSyz147064 
2667d62bc4baSyz147064 	grp->lg_margin = margin;
2668d62bc4baSyz147064 	return (margin);
2669d62bc4baSyz147064 }
2670d62bc4baSyz147064 
2671d62bc4baSyz147064 /*
2672d62bc4baSyz147064  * Checks if the maximum margin of the specified port is compatible
2673d62bc4baSyz147064  * with the maximum margin of the specified aggregation group, returns
2674d62bc4baSyz147064  * B_TRUE if it is, B_FALSE otherwise.
2675d62bc4baSyz147064  */
2676d62bc4baSyz147064 static boolean_t
aggr_grp_margin_check(aggr_grp_t * grp,aggr_port_t * port)2677d62bc4baSyz147064 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2678d62bc4baSyz147064 {
2679d62bc4baSyz147064 	if (port->lp_margin >= grp->lg_margin)
2680d62bc4baSyz147064 		return (B_TRUE);
2681d62bc4baSyz147064 
2682d62bc4baSyz147064 	/*
2683d62bc4baSyz147064 	 * See whether the current margin value is allowed to be changed to
2684d62bc4baSyz147064 	 * the new value.
2685d62bc4baSyz147064 	 */
2686d62bc4baSyz147064 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2687d62bc4baSyz147064 		return (B_FALSE);
2688d62bc4baSyz147064 
2689d62bc4baSyz147064 	grp->lg_margin = port->lp_margin;
2690d62bc4baSyz147064 	return (B_TRUE);
2691d62bc4baSyz147064 }
2692986cab2cSGirish Moodalbail 
2693986cab2cSGirish Moodalbail /*
2694986cab2cSGirish Moodalbail  * Set MTU on individual ports of an aggregation group
2695986cab2cSGirish Moodalbail  */
2696986cab2cSGirish Moodalbail static int
aggr_set_port_sdu(aggr_grp_t * grp,aggr_port_t * port,uint32_t sdu,uint32_t * old_mtu)2697986cab2cSGirish Moodalbail aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2698986cab2cSGirish Moodalbail     uint32_t *old_mtu)
2699986cab2cSGirish Moodalbail {
2700986cab2cSGirish Moodalbail 	boolean_t 		removed = B_FALSE;
2701986cab2cSGirish Moodalbail 	mac_perim_handle_t	mph;
2702986cab2cSGirish Moodalbail 	mac_diag_t		diag;
2703986cab2cSGirish Moodalbail 	int			err, rv, retry = 0;
2704986cab2cSGirish Moodalbail 
2705986cab2cSGirish Moodalbail 	if (port->lp_mah != NULL) {
2706986cab2cSGirish Moodalbail 		(void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2707986cab2cSGirish Moodalbail 		port->lp_mah = NULL;
2708986cab2cSGirish Moodalbail 		removed = B_TRUE;
2709986cab2cSGirish Moodalbail 	}
2710986cab2cSGirish Moodalbail 	err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2711986cab2cSGirish Moodalbail try_again:
27124c91d6c6SVenugopal Iyer 	if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
27134c91d6c6SVenugopal Iyer 	    MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
27144c91d6c6SVenugopal Iyer 	    &port->lp_mah, 0, &diag)) != 0) {
2715986cab2cSGirish Moodalbail 		/*
2716986cab2cSGirish Moodalbail 		 * following is a workaround for a bug in 'bge' driver.
2717986cab2cSGirish Moodalbail 		 * See CR 6794654 for more information and this work around
2718986cab2cSGirish Moodalbail 		 * will be removed once the CR is fixed.
2719986cab2cSGirish Moodalbail 		 */
2720986cab2cSGirish Moodalbail 		if (rv == EIO && retry++ < 3) {
2721986cab2cSGirish Moodalbail 			delay(2 * hz);
2722986cab2cSGirish Moodalbail 			goto try_again;
2723986cab2cSGirish Moodalbail 		}
2724986cab2cSGirish Moodalbail 		/*
27254c91d6c6SVenugopal Iyer 		 * if mac_unicast_add() failed while setting the MTU,
2726986cab2cSGirish Moodalbail 		 * detach the port from the group.
2727986cab2cSGirish Moodalbail 		 */
2728986cab2cSGirish Moodalbail 		mac_perim_enter_by_mh(port->lp_mh, &mph);
2729986cab2cSGirish Moodalbail 		(void) aggr_grp_detach_port(grp, port);
2730986cab2cSGirish Moodalbail 		mac_perim_exit(mph);
2731986cab2cSGirish Moodalbail 		cmn_err(CE_WARN, "Unable to restart the port %s while "
2732986cab2cSGirish Moodalbail 		    "setting MTU. Detaching the port from the aggregation.",
2733986cab2cSGirish Moodalbail 		    mac_client_name(port->lp_mch));
2734986cab2cSGirish Moodalbail 	}
2735986cab2cSGirish Moodalbail 	return (err);
2736986cab2cSGirish Moodalbail }
2737986cab2cSGirish Moodalbail 
2738986cab2cSGirish Moodalbail static int
aggr_sdu_update(aggr_grp_t * grp,uint32_t sdu)2739986cab2cSGirish Moodalbail aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2740986cab2cSGirish Moodalbail {
2741986cab2cSGirish Moodalbail 	int			err = 0, i, rv;
2742986cab2cSGirish Moodalbail 	aggr_port_t		*port;
2743986cab2cSGirish Moodalbail 	uint32_t		*mtu;
2744986cab2cSGirish Moodalbail 
2745986cab2cSGirish Moodalbail 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2746986cab2cSGirish Moodalbail 
2747986cab2cSGirish Moodalbail 	/*
2748986cab2cSGirish Moodalbail 	 * If the MTU being set is equal to aggr group's maximum
2749986cab2cSGirish Moodalbail 	 * allowable value, then there is nothing to change
2750986cab2cSGirish Moodalbail 	 */
2751986cab2cSGirish Moodalbail 	if (sdu == grp->lg_max_sdu)
2752986cab2cSGirish Moodalbail 		return (0);
2753986cab2cSGirish Moodalbail 
2754986cab2cSGirish Moodalbail 	/* 0 is aggr group's min sdu */
2755986cab2cSGirish Moodalbail 	if (sdu == 0)
2756986cab2cSGirish Moodalbail 		return (EINVAL);
2757986cab2cSGirish Moodalbail 
2758986cab2cSGirish Moodalbail 	mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
2759986cab2cSGirish Moodalbail 	for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
2760986cab2cSGirish Moodalbail 	    port = port->lp_next, i++) {
2761986cab2cSGirish Moodalbail 		err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
2762986cab2cSGirish Moodalbail 	}
2763986cab2cSGirish Moodalbail 	if (err != 0) {
2764986cab2cSGirish Moodalbail 		/* recover from error: reset the mtus of the ports */
2765986cab2cSGirish Moodalbail 		aggr_port_t *tmp;
2766986cab2cSGirish Moodalbail 
2767986cab2cSGirish Moodalbail 		for (tmp = grp->lg_ports, i = 0; tmp != port;
2768986cab2cSGirish Moodalbail 		    tmp = tmp->lp_next, i++) {
2769986cab2cSGirish Moodalbail 			(void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
2770986cab2cSGirish Moodalbail 		}
2771986cab2cSGirish Moodalbail 		goto bail;
2772986cab2cSGirish Moodalbail 	}
2773986cab2cSGirish Moodalbail 	grp->lg_max_sdu = aggr_grp_max_sdu(grp);
2774986cab2cSGirish Moodalbail 	rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
2775986cab2cSGirish Moodalbail 	ASSERT(rv == 0);
2776986cab2cSGirish Moodalbail bail:
2777986cab2cSGirish Moodalbail 	kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
2778986cab2cSGirish Moodalbail 	return (err);
2779986cab2cSGirish Moodalbail }
2780986cab2cSGirish Moodalbail 
2781986cab2cSGirish Moodalbail /*
2782986cab2cSGirish Moodalbail  * Callback functions for set/get of properties
2783986cab2cSGirish Moodalbail  */
2784986cab2cSGirish Moodalbail /*ARGSUSED*/
2785986cab2cSGirish Moodalbail static int
aggr_m_setprop(void * m_driver,const char * pr_name,mac_prop_id_t pr_num,uint_t pr_valsize,const void * pr_val)2786986cab2cSGirish Moodalbail aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2787986cab2cSGirish Moodalbail     uint_t pr_valsize, const void *pr_val)
2788986cab2cSGirish Moodalbail {
2789986cab2cSGirish Moodalbail 	int 		err = ENOTSUP;
2790986cab2cSGirish Moodalbail 	aggr_grp_t 	*grp = m_driver;
2791986cab2cSGirish Moodalbail 
2792986cab2cSGirish Moodalbail 	switch (pr_num) {
2793986cab2cSGirish Moodalbail 	case MAC_PROP_MTU: {
2794986cab2cSGirish Moodalbail 		uint32_t 	mtu;
2795986cab2cSGirish Moodalbail 
2796986cab2cSGirish Moodalbail 		if (pr_valsize < sizeof (mtu)) {
2797986cab2cSGirish Moodalbail 			err = EINVAL;
2798986cab2cSGirish Moodalbail 			break;
2799986cab2cSGirish Moodalbail 		}
2800986cab2cSGirish Moodalbail 		bcopy(pr_val, &mtu, sizeof (mtu));
2801986cab2cSGirish Moodalbail 		err = aggr_sdu_update(grp, mtu);
2802986cab2cSGirish Moodalbail 		break;
2803986cab2cSGirish Moodalbail 	}
2804986cab2cSGirish Moodalbail 	default:
2805986cab2cSGirish Moodalbail 		break;
2806986cab2cSGirish Moodalbail 	}
2807986cab2cSGirish Moodalbail 	return (err);
2808986cab2cSGirish Moodalbail }
2809986cab2cSGirish Moodalbail 
28100591ddd0SPrakash Jalan typedef struct rboundary {
28110591ddd0SPrakash Jalan 	uint32_t	bval;
28120591ddd0SPrakash Jalan 	int		btype;
28130591ddd0SPrakash Jalan } rboundary_t;
28140591ddd0SPrakash Jalan 
28150591ddd0SPrakash Jalan /*
28160591ddd0SPrakash Jalan  * This function finds the intersection of mtu ranges stored in arrays -
28170591ddd0SPrakash Jalan  * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
28180591ddd0SPrakash Jalan  * Individual arrays are assumed to contain non-overlapping ranges.
28190591ddd0SPrakash Jalan  * Algorithm:
28200591ddd0SPrakash Jalan  *   A range has two boundaries - min and max. We scan all arrays and store
28210591ddd0SPrakash Jalan  * each boundary as a separate element in a temporary array. We also store
28220591ddd0SPrakash Jalan  * the boundary types, min or max, as +1 or -1 respectively in the temporary
28230591ddd0SPrakash Jalan  * array. Then we sort the temporary array in ascending order. We scan the
28240591ddd0SPrakash Jalan  * sorted array from lower to higher values and keep a cumulative sum of
28250591ddd0SPrakash Jalan  * boundary types. Element in the temporary array for which the sum reaches
28260591ddd0SPrakash Jalan  * mcount is a min boundary of a range in the result and next element will be
28270591ddd0SPrakash Jalan  * max boundary.
28280591ddd0SPrakash Jalan  *
28290591ddd0SPrakash Jalan  * Example for mcount = 3,
28300591ddd0SPrakash Jalan  *
28310591ddd0SPrakash Jalan  *  ----|_________|-------|_______|----|__|------ mrange[0]
28320591ddd0SPrakash Jalan  *
28330591ddd0SPrakash Jalan  *  -------|________|--|____________|-----|___|-- mrange[1]
28340591ddd0SPrakash Jalan  *
28350591ddd0SPrakash Jalan  *  --------|________________|-------|____|------ mrange[2]
28360591ddd0SPrakash Jalan  *
28370591ddd0SPrakash Jalan  *                                      3 2 1
28380591ddd0SPrakash Jalan  *                                       \|/
28390591ddd0SPrakash Jalan  *      1  23     2 1  2  3  2    1 01 2  V   0  <- the sum
28400591ddd0SPrakash Jalan  *  ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
28410591ddd0SPrakash Jalan  *
28420591ddd0SPrakash Jalan  *                                 same min and max
28430591ddd0SPrakash Jalan  *                                        V
28440591ddd0SPrakash Jalan  *  --------|_____|-------|__|------------|------ intersecting ranges
28450591ddd0SPrakash Jalan  */
28460591ddd0SPrakash Jalan void
aggr_mtu_range_intersection(mac_propval_range_t ** mrange,int mcount,mac_propval_uint32_range_t ** prval,int * prmaxcnt,int * prcount)28470591ddd0SPrakash Jalan aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount,
28480591ddd0SPrakash Jalan     mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount)
2849f0f2c3a5SGirish Moodalbail {
28500591ddd0SPrakash Jalan 	mac_propval_uint32_range_t	*rval, *ur;
28510591ddd0SPrakash Jalan 	int				rmaxcnt, rcount;
28520591ddd0SPrakash Jalan 	size_t				sz_range32;
28530591ddd0SPrakash Jalan 	rboundary_t			*ta; /* temporary array */
28540591ddd0SPrakash Jalan 	rboundary_t			temp;
28550591ddd0SPrakash Jalan 	boolean_t			range_started = B_FALSE;
28560591ddd0SPrakash Jalan 	int				i, j, m, sum;
28570591ddd0SPrakash Jalan 
28580591ddd0SPrakash Jalan 	sz_range32 = sizeof (mac_propval_uint32_range_t);
28590591ddd0SPrakash Jalan 
28600591ddd0SPrakash Jalan 	for (i = 0, rmaxcnt = 0; i < mcount; i++)
28610591ddd0SPrakash Jalan 		rmaxcnt += mrange[i]->mpr_count;
28620591ddd0SPrakash Jalan 
28630591ddd0SPrakash Jalan 	/* Allocate enough space to store the results */
28640591ddd0SPrakash Jalan 	rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP);
28650591ddd0SPrakash Jalan 
28660591ddd0SPrakash Jalan 	/* Number of boundaries are twice as many as ranges */
28670591ddd0SPrakash Jalan 	ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP);
28680591ddd0SPrakash Jalan 
28690591ddd0SPrakash Jalan 	for (i = 0, m = 0; i < mcount; i++) {
28700591ddd0SPrakash Jalan 		ur = &(mrange[i]->mpr_range_uint32[0]);
28710591ddd0SPrakash Jalan 		for (j = 0; j < mrange[i]->mpr_count; j++) {
28720591ddd0SPrakash Jalan 			ta[m].bval = ur[j].mpur_min;
28730591ddd0SPrakash Jalan 			ta[m++].btype = 1;
28740591ddd0SPrakash Jalan 			ta[m].bval = ur[j].mpur_max;
28750591ddd0SPrakash Jalan 			ta[m++].btype = -1;
28760591ddd0SPrakash Jalan 		}
28770591ddd0SPrakash Jalan 	}
28780591ddd0SPrakash Jalan 
28790591ddd0SPrakash Jalan 	/*
28800591ddd0SPrakash Jalan 	 * Sort the temporary array in ascending order of bval;
28810591ddd0SPrakash Jalan 	 * if boundary values are same then sort on btype.
28820591ddd0SPrakash Jalan 	 */
28830591ddd0SPrakash Jalan 	for (i = 0; i < m-1; i++) {
28840591ddd0SPrakash Jalan 		for (j = i+1; j < m; j++) {
28850591ddd0SPrakash Jalan 			if ((ta[i].bval > ta[j].bval) ||
28860591ddd0SPrakash Jalan 			    ((ta[i].bval == ta[j].bval) &&
28870591ddd0SPrakash Jalan 			    (ta[i].btype < ta[j].btype))) {
28880591ddd0SPrakash Jalan 				temp = ta[i];
28890591ddd0SPrakash Jalan 				ta[i] = ta[j];
28900591ddd0SPrakash Jalan 				ta[j] = temp;
28910591ddd0SPrakash Jalan 			}
28920591ddd0SPrakash Jalan 		}
28930591ddd0SPrakash Jalan 	}
28940591ddd0SPrakash Jalan 
28950591ddd0SPrakash Jalan 	/* Walk through temporary array to find all ranges in the results */
28960591ddd0SPrakash Jalan 	for (i = 0, sum = 0, rcount = 0; i < m; i++) {
28970591ddd0SPrakash Jalan 		sum += ta[i].btype;
28980591ddd0SPrakash Jalan 		if (sum == mcount) {
28990591ddd0SPrakash Jalan 			rval[rcount].mpur_min = ta[i].bval;
29000591ddd0SPrakash Jalan 			range_started = B_TRUE;
29010591ddd0SPrakash Jalan 		} else if (sum < mcount && range_started) {
29020591ddd0SPrakash Jalan 			rval[rcount++].mpur_max = ta[i].bval;
29030591ddd0SPrakash Jalan 			range_started = B_FALSE;
29040591ddd0SPrakash Jalan 		}
29050591ddd0SPrakash Jalan 	}
29060591ddd0SPrakash Jalan 
29070591ddd0SPrakash Jalan 	*prval = rval;
29080591ddd0SPrakash Jalan 	*prmaxcnt = rmaxcnt;
29090591ddd0SPrakash Jalan 	*prcount = rcount;
291084191983SPrakash Jalan 
291184191983SPrakash Jalan 	kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t));
29120591ddd0SPrakash Jalan }
29130591ddd0SPrakash Jalan 
29140591ddd0SPrakash Jalan /*
29150591ddd0SPrakash Jalan  * Returns the mtu ranges which could be supported by aggr group.
29160591ddd0SPrakash Jalan  * prmaxcnt returns the size of the buffer prval, prcount returns
29170591ddd0SPrakash Jalan  * the number of valid entries in prval. Caller is responsible
29180591ddd0SPrakash Jalan  * for freeing up prval.
29190591ddd0SPrakash Jalan  */
29200591ddd0SPrakash Jalan int
aggr_grp_possible_mtu_range(aggr_grp_t * grp,mac_propval_uint32_range_t ** prval,int * prmaxcnt,int * prcount)29210591ddd0SPrakash Jalan aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval,
29220591ddd0SPrakash Jalan     int *prmaxcnt, int *prcount)
29230591ddd0SPrakash Jalan {
29240591ddd0SPrakash Jalan 	mac_propval_range_t		**vals;
2925f0f2c3a5SGirish Moodalbail 	aggr_port_t			*port;
2926f0f2c3a5SGirish Moodalbail 	mac_perim_handle_t		mph;
29270591ddd0SPrakash Jalan 	uint_t 				i, numr;
2928f0f2c3a5SGirish Moodalbail 	int 				err = 0;
29290591ddd0SPrakash Jalan 	size_t				sz_propval, sz_range32;
29300591ddd0SPrakash Jalan 	size_t				size;
29310591ddd0SPrakash Jalan 
29320591ddd0SPrakash Jalan 	sz_propval = sizeof (mac_propval_range_t);
29330591ddd0SPrakash Jalan 	sz_range32 = sizeof (mac_propval_uint32_range_t);
2934f0f2c3a5SGirish Moodalbail 
2935f0f2c3a5SGirish Moodalbail 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2936f0f2c3a5SGirish Moodalbail 
29370591ddd0SPrakash Jalan 	vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports,
2938f0f2c3a5SGirish Moodalbail 	    KM_SLEEP);
2939f0f2c3a5SGirish Moodalbail 
2940f0f2c3a5SGirish Moodalbail 	for (port = grp->lg_ports, i = 0; port != NULL;
2941f0f2c3a5SGirish Moodalbail 	    port = port->lp_next, i++) {
29420591ddd0SPrakash Jalan 
29430591ddd0SPrakash Jalan 		size = sz_propval;
29440591ddd0SPrakash Jalan 		vals[i] = kmem_alloc(size, KM_SLEEP);
29450591ddd0SPrakash Jalan 		vals[i]->mpr_count = 1;
29460591ddd0SPrakash Jalan 
2947f0f2c3a5SGirish Moodalbail 		mac_perim_enter_by_mh(port->lp_mh, &mph);
29480591ddd0SPrakash Jalan 
29490dc2366fSVenugopal Iyer 		err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
29500591ddd0SPrakash Jalan 		    NULL, 0, vals[i], NULL);
29510591ddd0SPrakash Jalan 		if (err == ENOSPC) {
29520591ddd0SPrakash Jalan 			/*
29530591ddd0SPrakash Jalan 			 * Not enough space to hold all ranges.
29540591ddd0SPrakash Jalan 			 * Allocate extra space as indicated and retry.
29550591ddd0SPrakash Jalan 			 */
29560591ddd0SPrakash Jalan 			numr = vals[i]->mpr_count;
29570591ddd0SPrakash Jalan 			kmem_free(vals[i], sz_propval);
29580591ddd0SPrakash Jalan 			size = sz_propval + (numr - 1) * sz_range32;
29590591ddd0SPrakash Jalan 			vals[i] = kmem_alloc(size, KM_SLEEP);
29600591ddd0SPrakash Jalan 			vals[i]->mpr_count = numr;
29610591ddd0SPrakash Jalan 			err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
29620591ddd0SPrakash Jalan 			    NULL, 0, vals[i], NULL);
29630591ddd0SPrakash Jalan 			ASSERT(err != ENOSPC);
29640591ddd0SPrakash Jalan 		}
2965f0f2c3a5SGirish Moodalbail 		mac_perim_exit(mph);
29660591ddd0SPrakash Jalan 		if (err != 0) {
29670591ddd0SPrakash Jalan 			kmem_free(vals[i], size);
29680591ddd0SPrakash Jalan 			vals[i] = NULL;
2969f0f2c3a5SGirish Moodalbail 			break;
2970f0f2c3a5SGirish Moodalbail 		}
29710591ddd0SPrakash Jalan 	}
29720dc2366fSVenugopal Iyer 
2973f0f2c3a5SGirish Moodalbail 	/*
2974f0f2c3a5SGirish Moodalbail 	 * if any of the underlying ports does not support changing MTU then
2975f0f2c3a5SGirish Moodalbail 	 * just return ENOTSUP
2976f0f2c3a5SGirish Moodalbail 	 */
2977f0f2c3a5SGirish Moodalbail 	if (port != NULL) {
2978f0f2c3a5SGirish Moodalbail 		ASSERT(err != 0);
2979f0f2c3a5SGirish Moodalbail 		goto done;
2980f0f2c3a5SGirish Moodalbail 	}
29810dc2366fSVenugopal Iyer 
29820591ddd0SPrakash Jalan 	aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt,
29830591ddd0SPrakash Jalan 	    prcount);
29840dc2366fSVenugopal Iyer 
29850591ddd0SPrakash Jalan done:
29860591ddd0SPrakash Jalan 	for (i = 0; i < grp->lg_nports; i++) {
29870591ddd0SPrakash Jalan 		if (vals[i] != NULL) {
29880591ddd0SPrakash Jalan 			numr = vals[i]->mpr_count;
29890591ddd0SPrakash Jalan 			size = sz_propval + (numr - 1) * sz_range32;
29900591ddd0SPrakash Jalan 			kmem_free(vals[i], size);
29910591ddd0SPrakash Jalan 		}
29920591ddd0SPrakash Jalan 	}
29930591ddd0SPrakash Jalan 
29940591ddd0SPrakash Jalan 	kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports);
2995f0f2c3a5SGirish Moodalbail 	return (err);
2996f0f2c3a5SGirish Moodalbail }
2997f0f2c3a5SGirish Moodalbail 
29980dc2366fSVenugopal Iyer static void
aggr_m_propinfo(void * m_driver,const char * pr_name,mac_prop_id_t pr_num,mac_prop_info_handle_t prh)29990dc2366fSVenugopal Iyer aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
30000dc2366fSVenugopal Iyer     mac_prop_info_handle_t prh)
3001986cab2cSGirish Moodalbail {
3002f0f2c3a5SGirish Moodalbail 	aggr_grp_t			*grp = m_driver;
30030591ddd0SPrakash Jalan 	mac_propval_uint32_range_t	*rval = NULL;
30040591ddd0SPrakash Jalan 	int				i, rcount, rmaxcnt;
30050591ddd0SPrakash Jalan 	int				err = 0;
3006f0f2c3a5SGirish Moodalbail 
30070dc2366fSVenugopal Iyer 	_NOTE(ARGUNUSED(pr_name));
30080dc2366fSVenugopal Iyer 
3009f0f2c3a5SGirish Moodalbail 	switch (pr_num) {
30100591ddd0SPrakash Jalan 	case MAC_PROP_MTU:
30110dc2366fSVenugopal Iyer 
30120591ddd0SPrakash Jalan 		err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt,
30130591ddd0SPrakash Jalan 		    &rcount);
30140591ddd0SPrakash Jalan 		if (err != 0) {
30150591ddd0SPrakash Jalan 			ASSERT(rval == NULL);
30160dc2366fSVenugopal Iyer 			return;
3017f0f2c3a5SGirish Moodalbail 		}
30180591ddd0SPrakash Jalan 		for (i = 0; i < rcount; i++) {
30190591ddd0SPrakash Jalan 			mac_prop_info_set_range_uint32(prh,
30200591ddd0SPrakash Jalan 			    rval[i].mpur_min, rval[i].mpur_max);
30210591ddd0SPrakash Jalan 		}
30220591ddd0SPrakash Jalan 		kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt);
30230591ddd0SPrakash Jalan 		break;
30240dc2366fSVenugopal Iyer 	}
3025986cab2cSGirish Moodalbail }
3026