xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_grp.c (revision 66582b606a8194f7f3ba5b3a3a6dca5b0d346361)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2017, Joyent, Inc.
24  */
25 
26 /*
27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28  *
29  * An instance of the structure aggr_grp_t is allocated for each
30  * link aggregation group. When created, aggr_grp_t objects are
31  * entered into the aggr_grp_hash hash table maintained by the modhash
32  * module. The hash key is the linkid associated with the link
33  * aggregation group.
34  *
35  * A set of MAC ports are associated with each association group.
36  *
37  * Aggr pseudo TX rings
38  * --------------------
39  * The underlying ports (NICs) in an aggregation can have TX rings. To
40  * enhance aggr's performance, these TX rings are made available to the
41  * aggr layer as pseudo TX rings. The concept of pseudo rings are not new.
42  * They are already present and implemented on the RX side. It is called
43  * as pseudo RX rings. The same concept is extended to the TX side where
44  * each TX ring of an underlying port is reflected in aggr as a pseudo
45  * TX ring. Thus each pseudo TX ring will map to a specific hardware TX
46  * ring. Even in the case of a NIC that does not have a TX ring, a pseudo
47  * TX ring is given to the aggregation layer.
48  *
49  * With this change, the outgoing stack depth looks much better:
50  *
51  * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() ->
52  * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx()
53  *
54  * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings:
55  * SRS_TX_AGGR and SRS_TX_BW_AGGR.
56  *
57  * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine
58  * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX
59  * ring belonging to a port on which the packet has to be sent.
60  * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4
61  * policy and then uses the fanout_hint passed to it to pick a TX ring from
62  * the selected port.
63  *
64  * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where
65  * bandwidth limit is applied first on the outgoing packet and the packets
66  * allowed to go out would call mac_tx_aggr_mode() to send the packet on a
67  * particular TX ring.
68  */
69 
70 #include <sys/types.h>
71 #include <sys/sysmacros.h>
72 #include <sys/conf.h>
73 #include <sys/cmn_err.h>
74 #include <sys/disp.h>
75 #include <sys/list.h>
76 #include <sys/ksynch.h>
77 #include <sys/kmem.h>
78 #include <sys/stream.h>
79 #include <sys/modctl.h>
80 #include <sys/ddi.h>
81 #include <sys/sunddi.h>
82 #include <sys/atomic.h>
83 #include <sys/stat.h>
84 #include <sys/modhash.h>
85 #include <sys/id_space.h>
86 #include <sys/strsun.h>
87 #include <sys/cred.h>
88 #include <sys/dlpi.h>
89 #include <sys/zone.h>
90 #include <sys/mac_provider.h>
91 #include <sys/dls.h>
92 #include <sys/vlan.h>
93 #include <sys/aggr.h>
94 #include <sys/aggr_impl.h>
95 
96 static int aggr_m_start(void *);
97 static void aggr_m_stop(void *);
98 static int aggr_m_promisc(void *, boolean_t);
99 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
100 static int aggr_m_unicst(void *, const uint8_t *);
101 static int aggr_m_stat(void *, uint_t, uint64_t *);
102 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
103 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
104 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
105     const void *);
106 static void aggr_m_propinfo(void *, const char *, mac_prop_id_t,
107     mac_prop_info_handle_t);
108 
109 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
110 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
111     boolean_t *);
112 
113 static void aggr_grp_capab_set(aggr_grp_t *);
114 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
115 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
116 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
117 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
118 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
119 
120 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
121 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
122 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
123 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
124 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
125 static int aggr_addmac(void *, const uint8_t *);
126 static int aggr_remmac(void *, const uint8_t *);
127 static mblk_t *aggr_rx_poll(void *, int);
128 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
129     const int, mac_ring_info_t *, mac_ring_handle_t);
130 static void aggr_fill_group(void *, mac_ring_type_t, const int,
131     mac_group_info_t *, mac_group_handle_t);
132 
133 static kmem_cache_t	*aggr_grp_cache;
134 static mod_hash_t	*aggr_grp_hash;
135 static krwlock_t	aggr_grp_lock;
136 static uint_t		aggr_grp_cnt;
137 static id_space_t	*key_ids;
138 
139 #define	GRP_HASHSZ		64
140 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
141 #define	AGGR_PORT_NAME_DELIMIT '-'
142 
143 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
144 
145 #define	AGGR_M_CALLBACK_FLAGS	\
146 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)
147 
148 static mac_callbacks_t aggr_m_callbacks = {
149 	AGGR_M_CALLBACK_FLAGS,
150 	aggr_m_stat,
151 	aggr_m_start,
152 	aggr_m_stop,
153 	aggr_m_promisc,
154 	aggr_m_multicst,
155 	NULL,
156 	NULL,
157 	NULL,
158 	aggr_m_ioctl,
159 	aggr_m_capab_get,
160 	NULL,
161 	NULL,
162 	aggr_m_setprop,
163 	NULL,
164 	aggr_m_propinfo
165 };
166 
167 /*ARGSUSED*/
168 static int
169 aggr_grp_constructor(void *buf, void *arg, int kmflag)
170 {
171 	aggr_grp_t *grp = buf;
172 
173 	bzero(grp, sizeof (*grp));
174 	mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
175 	cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
176 	rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
177 	mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
178 	cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
179 	mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL);
180 	cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL);
181 	grp->lg_link_state = LINK_STATE_UNKNOWN;
182 	return (0);
183 }
184 
185 /*ARGSUSED*/
186 static void
187 aggr_grp_destructor(void *buf, void *arg)
188 {
189 	aggr_grp_t *grp = buf;
190 
191 	if (grp->lg_tx_ports != NULL) {
192 		kmem_free(grp->lg_tx_ports,
193 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
194 	}
195 
196 	mutex_destroy(&grp->lg_lacp_lock);
197 	cv_destroy(&grp->lg_lacp_cv);
198 	mutex_destroy(&grp->lg_port_lock);
199 	cv_destroy(&grp->lg_port_cv);
200 	rw_destroy(&grp->lg_tx_lock);
201 	mutex_destroy(&grp->lg_tx_flowctl_lock);
202 	cv_destroy(&grp->lg_tx_flowctl_cv);
203 }
204 
205 void
206 aggr_grp_init(void)
207 {
208 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
209 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
210 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
211 
212 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
213 	    GRP_HASHSZ, mod_hash_null_valdtor);
214 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
215 	aggr_grp_cnt = 0;
216 
217 	/*
218 	 * Allocate an id space to manage key values (when key is not
219 	 * specified). The range of the id space will be from
220 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
221 	 * uses a 16-bit key.
222 	 */
223 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
224 	ASSERT(key_ids != NULL);
225 }
226 
227 void
228 aggr_grp_fini(void)
229 {
230 	id_space_destroy(key_ids);
231 	rw_destroy(&aggr_grp_lock);
232 	mod_hash_destroy_idhash(aggr_grp_hash);
233 	kmem_cache_destroy(aggr_grp_cache);
234 }
235 
236 uint_t
237 aggr_grp_count(void)
238 {
239 	uint_t	count;
240 
241 	rw_enter(&aggr_grp_lock, RW_READER);
242 	count = aggr_grp_cnt;
243 	rw_exit(&aggr_grp_lock);
244 	return (count);
245 }
246 
247 /*
248  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
249  * requires the mac perimeter, this function holds a reference of the aggr
250  * and aggr won't call mac_unregister() until this reference drops to 0.
251  */
252 void
253 aggr_grp_port_hold(aggr_port_t *port)
254 {
255 	aggr_grp_t	*grp = port->lp_grp;
256 
257 	AGGR_PORT_REFHOLD(port);
258 	mutex_enter(&grp->lg_port_lock);
259 	grp->lg_port_ref++;
260 	mutex_exit(&grp->lg_port_lock);
261 }
262 
263 /*
264  * Release the reference of the grp and inform aggr_grp_delete() calling
265  * mac_unregister() is now safe.
266  */
267 void
268 aggr_grp_port_rele(aggr_port_t *port)
269 {
270 	aggr_grp_t	*grp = port->lp_grp;
271 
272 	mutex_enter(&grp->lg_port_lock);
273 	if (--grp->lg_port_ref == 0)
274 		cv_signal(&grp->lg_port_cv);
275 	mutex_exit(&grp->lg_port_lock);
276 	AGGR_PORT_REFRELE(port);
277 }
278 
279 /*
280  * Wait for the port's lacp timer thread and the port's notification callback
281  * to exit.
282  */
283 void
284 aggr_grp_port_wait(aggr_grp_t *grp)
285 {
286 	mutex_enter(&grp->lg_port_lock);
287 	if (grp->lg_port_ref != 0)
288 		cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
289 	mutex_exit(&grp->lg_port_lock);
290 }
291 
292 /*
293  * Attach a port to a link aggregation group.
294  *
295  * A port is attached to a link aggregation group once its speed
296  * and link state have been verified.
297  *
298  * Returns B_TRUE if the group link state or speed has changed. If
299  * it's the case, the caller must notify the MAC layer via a call
300  * to mac_link().
301  */
302 boolean_t
303 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
304 {
305 	boolean_t link_state_changed = B_FALSE;
306 
307 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
308 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
309 
310 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
311 		return (B_FALSE);
312 
313 	/*
314 	 * Validate the MAC port link speed and update the group
315 	 * link speed if needed.
316 	 */
317 	if (port->lp_ifspeed == 0 ||
318 	    port->lp_link_state != LINK_STATE_UP ||
319 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
320 		/*
321 		 * Can't attach a MAC port with unknown link speed,
322 		 * down link, or not in full duplex mode.
323 		 */
324 		return (B_FALSE);
325 	}
326 
327 	if (grp->lg_ifspeed == 0) {
328 		/*
329 		 * The group inherits the speed of the first link being
330 		 * attached.
331 		 */
332 		grp->lg_ifspeed = port->lp_ifspeed;
333 		link_state_changed = B_TRUE;
334 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
335 		/*
336 		 * The link speed of the MAC port must be the same as
337 		 * the group link speed, as per 802.3ad. Since it is
338 		 * not, the attach is cancelled.
339 		 */
340 		return (B_FALSE);
341 	}
342 
343 	grp->lg_nattached_ports++;
344 
345 	/*
346 	 * Update the group link state.
347 	 */
348 	if (grp->lg_link_state != LINK_STATE_UP) {
349 		grp->lg_link_state = LINK_STATE_UP;
350 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
351 		link_state_changed = B_TRUE;
352 	}
353 
354 	/*
355 	 * Update port's state.
356 	 */
357 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
358 
359 	aggr_grp_multicst_port(port, B_TRUE);
360 
361 	/*
362 	 * Set port's receive callback
363 	 */
364 	mac_rx_set(port->lp_mch, aggr_recv_cb, port);
365 
366 	/*
367 	 * If LACP is OFF, the port can be used to send data as soon
368 	 * as its link is up and verified to be compatible with the
369 	 * aggregation.
370 	 *
371 	 * If LACP is active or passive, notify the LACP subsystem, which
372 	 * will enable sending on the port following the LACP protocol.
373 	 */
374 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
375 		aggr_send_port_enable(port);
376 	else
377 		aggr_lacp_port_attached(port);
378 
379 	return (link_state_changed);
380 }
381 
382 boolean_t
383 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
384 {
385 	boolean_t link_state_changed = B_FALSE;
386 
387 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
388 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
389 
390 	/* update state */
391 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
392 		return (B_FALSE);
393 
394 	mac_rx_clear(port->lp_mch);
395 
396 	aggr_grp_multicst_port(port, B_FALSE);
397 
398 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
399 		aggr_send_port_disable(port);
400 	else
401 		aggr_lacp_port_detached(port);
402 
403 	port->lp_state = AGGR_PORT_STATE_STANDBY;
404 
405 	grp->lg_nattached_ports--;
406 	if (grp->lg_nattached_ports == 0) {
407 		/* the last attached MAC port of the group is being detached */
408 		grp->lg_ifspeed = 0;
409 		grp->lg_link_state = LINK_STATE_DOWN;
410 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
411 		link_state_changed = B_TRUE;
412 	}
413 
414 	return (link_state_changed);
415 }
416 
417 /*
418  * Update the MAC addresses of the constituent ports of the specified
419  * group. This function is invoked:
420  * - after creating a new aggregation group.
421  * - after adding new ports to an aggregation group.
422  * - after removing a port from a group when the MAC address of
423  *   that port was used for the MAC address of the group.
424  * - after the MAC address of a port changed when the MAC address
425  *   of that port was used for the MAC address of the group.
426  *
427  * Return true if the link state of the aggregation changed, for example
428  * as a result of a failure changing the MAC address of one of the
429  * constituent ports.
430  */
431 boolean_t
432 aggr_grp_update_ports_mac(aggr_grp_t *grp)
433 {
434 	aggr_port_t *cport;
435 	boolean_t link_state_changed = B_FALSE;
436 	mac_perim_handle_t mph;
437 
438 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
439 
440 	for (cport = grp->lg_ports; cport != NULL;
441 	    cport = cport->lp_next) {
442 		mac_perim_enter_by_mh(cport->lp_mh, &mph);
443 		if (aggr_port_unicst(cport) != 0) {
444 			if (aggr_grp_detach_port(grp, cport))
445 				link_state_changed = B_TRUE;
446 		} else {
447 			/*
448 			 * If a port was detached because of a previous
449 			 * failure changing the MAC address, the port is
450 			 * reattached when it successfully changes the MAC
451 			 * address now, and this might cause the link state
452 			 * of the aggregation to change.
453 			 */
454 			if (aggr_grp_attach_port(grp, cport))
455 				link_state_changed = B_TRUE;
456 		}
457 		mac_perim_exit(mph);
458 	}
459 	return (link_state_changed);
460 }
461 
462 /*
463  * Invoked when the MAC address of a port has changed. If the port's
464  * MAC address was used for the group MAC address, set mac_addr_changedp
465  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
466  * notification. If the link state changes due to detach/attach of
467  * the constituent port, set link_state_changedp to B_TRUE to indicate
468  * to the caller that it should send a MAC_NOTE_LINK notification. In both
469  * cases, it is the responsibility of the caller to invoke notification
470  * functions after releasing the the port lock.
471  */
472 void
473 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
474     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
475 {
476 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
477 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
478 	ASSERT(mac_addr_changedp != NULL);
479 	ASSERT(link_state_changedp != NULL);
480 
481 	*mac_addr_changedp = B_FALSE;
482 	*link_state_changedp = B_FALSE;
483 
484 	if (grp->lg_addr_fixed) {
485 		/*
486 		 * The group is using a fixed MAC address or an automatic
487 		 * MAC address has not been set.
488 		 */
489 		return;
490 	}
491 
492 	if (grp->lg_mac_addr_port == port) {
493 		/*
494 		 * The MAC address of the port was assigned to the group
495 		 * MAC address. Update the group MAC address.
496 		 */
497 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
498 		*mac_addr_changedp = B_TRUE;
499 	} else {
500 		/*
501 		 * Update the actual port MAC address to the MAC address
502 		 * of the group.
503 		 */
504 		if (aggr_port_unicst(port) != 0) {
505 			*link_state_changedp = aggr_grp_detach_port(grp, port);
506 		} else {
507 			/*
508 			 * If a port was detached because of a previous
509 			 * failure changing the MAC address, the port is
510 			 * reattached when it successfully changes the MAC
511 			 * address now, and this might cause the link state
512 			 * of the aggregation to change.
513 			 */
514 			*link_state_changedp = aggr_grp_attach_port(grp, port);
515 		}
516 	}
517 }
518 
519 /*
520  * Add a port to a link aggregation group.
521  */
522 static int
523 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
524     aggr_port_t **pp)
525 {
526 	aggr_port_t *port, **cport;
527 	mac_perim_handle_t mph;
528 	zoneid_t port_zoneid = ALL_ZONES;
529 	int err;
530 
531 	/* The port must be int the same zone as the aggregation. */
532 	if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
533 		port_zoneid = GLOBAL_ZONEID;
534 	if (grp->lg_zoneid != port_zoneid)
535 		return (EBUSY);
536 
537 	/*
538 	 * lg_mh could be NULL when the function is called during the creation
539 	 * of the aggregation.
540 	 */
541 	ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
542 
543 	/* create new port */
544 	err = aggr_port_create(grp, port_linkid, force, &port);
545 	if (err != 0)
546 		return (err);
547 
548 	mac_perim_enter_by_mh(port->lp_mh, &mph);
549 
550 	/* add port to list of group constituent ports */
551 	cport = &grp->lg_ports;
552 	while (*cport != NULL)
553 		cport = &((*cport)->lp_next);
554 	*cport = port;
555 
556 	/*
557 	 * Back reference to the group it is member of. A port always
558 	 * holds a reference to its group to ensure that the back
559 	 * reference is always valid.
560 	 */
561 	port->lp_grp = grp;
562 	AGGR_GRP_REFHOLD(grp);
563 	grp->lg_nports++;
564 
565 	aggr_lacp_init_port(port);
566 	mac_perim_exit(mph);
567 
568 	if (pp != NULL)
569 		*pp = port;
570 
571 	return (0);
572 }
573 
574 /*
575  * This is called in response to either our LACP state machine or a MAC
576  * notification that the link has gone down via aggr_send_port_disable(). At
577  * this point, we may need to update our default ring. To that end, we go
578  * through the set of ports (underlying datalinks in an aggregation) that are
579  * currently enabled to transmit data. If all our links have been disabled for
580  * transmit, then we don't do anything.
581  *
582  * Note, because we only have a single TX group, we don't have to worry about
583  * the rings moving between groups and the chance that mac will reassign it
584  * unless someone removes a port, at which point, we play it safe and call this
585  * again.
586  */
587 void
588 aggr_grp_update_default(aggr_grp_t *grp)
589 {
590 	aggr_port_t *port;
591 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
592 
593 	rw_enter(&grp->lg_tx_lock, RW_WRITER);
594 
595 	if (grp->lg_ntx_ports == 0) {
596 		rw_exit(&grp->lg_tx_lock);
597 		return;
598 	}
599 
600 	port = grp->lg_tx_ports[0];
601 	ASSERT(port->lp_tx_ring_cnt > 0);
602 	mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]);
603 	rw_exit(&grp->lg_tx_lock);
604 }
605 
606 /*
607  * Add a pseudo RX ring for the given HW ring handle.
608  */
609 static int
610 aggr_add_pseudo_rx_ring(aggr_port_t *port,
611     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
612 {
613 	aggr_pseudo_rx_ring_t	*ring;
614 	int			err;
615 	int			j;
616 
617 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
618 		ring = rx_grp->arg_rings + j;
619 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
620 			break;
621 	}
622 
623 	/*
624 	 * No slot for this new RX ring.
625 	 */
626 	if (j == MAX_RINGS_PER_GROUP)
627 		return (EIO);
628 
629 	ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
630 	ring->arr_hw_rh = hw_rh;
631 	ring->arr_port = port;
632 	rx_grp->arg_ring_cnt++;
633 
634 	/*
635 	 * The group is already registered, dynamically add a new ring to the
636 	 * mac group.
637 	 */
638 	if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
639 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
640 		ring->arr_hw_rh = NULL;
641 		ring->arr_port = NULL;
642 		rx_grp->arg_ring_cnt--;
643 	} else {
644 		mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
645 		    mac_find_ring(rx_grp->arg_gh, j));
646 	}
647 	return (err);
648 }
649 
650 /*
651  * Remove the pseudo RX ring of the given HW ring handle.
652  */
653 static void
654 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
655 {
656 	aggr_pseudo_rx_ring_t	*ring;
657 	int			j;
658 
659 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
660 		ring = rx_grp->arg_rings + j;
661 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
662 		    ring->arr_hw_rh != hw_rh) {
663 			continue;
664 		}
665 
666 		mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
667 
668 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
669 		ring->arr_hw_rh = NULL;
670 		ring->arr_port = NULL;
671 		rx_grp->arg_ring_cnt--;
672 		mac_hwring_teardown(hw_rh);
673 		break;
674 	}
675 }
676 
677 /*
678  * This function is called to create pseudo rings over the hardware rings of
679  * the underlying device. Note that there is a 1:1 mapping between the pseudo
680  * RX rings of the aggr and the hardware rings of the underlying port.
681  */
682 static int
683 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
684 {
685 	aggr_grp_t		*grp = port->lp_grp;
686 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
687 	aggr_unicst_addr_t	*addr, *a;
688 	mac_perim_handle_t	pmph;
689 	int			hw_rh_cnt, i = 0, j;
690 	int			err = 0;
691 
692 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
693 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
694 
695 	/*
696 	 * This function must be called after the aggr registers its mac
697 	 * and its RX group has been initialized.
698 	 */
699 	ASSERT(rx_grp->arg_gh != NULL);
700 
701 	/*
702 	 * Get the list the the underlying HW rings.
703 	 */
704 	hw_rh_cnt = mac_hwrings_get(port->lp_mch,
705 	    &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX);
706 
707 	if (port->lp_hwgh != NULL) {
708 		/*
709 		 * Quiesce the HW ring and the mac srs on the ring. Note
710 		 * that the HW ring will be restarted when the pseudo ring
711 		 * is started. At that time all the packets will be
712 		 * directly passed up to the pseudo RX ring and handled
713 		 * by mac srs created over the pseudo RX ring.
714 		 */
715 		mac_rx_client_quiesce(port->lp_mch);
716 		mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
717 	}
718 
719 	/*
720 	 * Add all the unicast addresses to the newly added port.
721 	 */
722 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
723 		if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
724 			break;
725 	}
726 
727 	for (i = 0; err == 0 && i < hw_rh_cnt; i++)
728 		err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
729 
730 	if (err != 0) {
731 		for (j = 0; j < i; j++)
732 			aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
733 
734 		for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
735 			aggr_port_remmac(port, a->aua_addr);
736 
737 		if (port->lp_hwgh != NULL) {
738 			mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
739 			mac_rx_client_restart(port->lp_mch);
740 			port->lp_hwgh = NULL;
741 		}
742 	} else {
743 		port->lp_rx_grp_added = B_TRUE;
744 	}
745 done:
746 	mac_perim_exit(pmph);
747 	return (err);
748 }
749 
750 /*
751  * This function is called by aggr to remove pseudo RX rings over the
752  * HW rings of the underlying port.
753  */
754 static void
755 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
756 {
757 	aggr_grp_t		*grp = port->lp_grp;
758 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
759 	aggr_unicst_addr_t	*addr;
760 	mac_group_handle_t	hwgh;
761 	mac_perim_handle_t	pmph;
762 	int			hw_rh_cnt, i;
763 
764 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
765 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
766 
767 	if (!port->lp_rx_grp_added)
768 		goto done;
769 
770 	ASSERT(rx_grp->arg_gh != NULL);
771 	hw_rh_cnt = mac_hwrings_get(port->lp_mch,
772 	    &hwgh, hw_rh, MAC_RING_TYPE_RX);
773 
774 	/*
775 	 * If hw_rh_cnt is 0, it means that the underlying port does not
776 	 * support RX rings. Directly return in this case.
777 	 */
778 	for (i = 0; i < hw_rh_cnt; i++)
779 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
780 
781 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
782 		aggr_port_remmac(port, addr->aua_addr);
783 
784 	if (port->lp_hwgh != NULL) {
785 		port->lp_hwgh = NULL;
786 
787 		/*
788 		 * First clear the permanent-quiesced flag of the RX srs then
789 		 * restart the HW ring and the mac srs on the ring. Note that
790 		 * the HW ring and associated SRS will soon been removed when
791 		 * the port is removed from the aggr.
792 		 */
793 		mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
794 		mac_rx_client_restart(port->lp_mch);
795 	}
796 
797 	port->lp_rx_grp_added = B_FALSE;
798 done:
799 	mac_perim_exit(pmph);
800 }
801 
802 /*
803  * Add a pseudo TX ring for the given HW ring handle.
804  */
805 static int
806 aggr_add_pseudo_tx_ring(aggr_port_t *port,
807     aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh,
808     mac_ring_handle_t *pseudo_rh)
809 {
810 	aggr_pseudo_tx_ring_t	*ring;
811 	int			err;
812 	int			i;
813 
814 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
815 	for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
816 		ring = tx_grp->atg_rings + i;
817 		if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE))
818 			break;
819 	}
820 	/*
821 	 * No slot for this new TX ring.
822 	 */
823 	if (i == MAX_RINGS_PER_GROUP)
824 		return (EIO);
825 	/*
826 	 * The following 4 statements needs to be done before
827 	 * calling mac_group_add_ring(). Otherwise it will
828 	 * result in an assertion failure in mac_init_ring().
829 	 */
830 	ring->atr_flags |= MAC_PSEUDO_RING_INUSE;
831 	ring->atr_hw_rh = hw_rh;
832 	ring->atr_port = port;
833 	tx_grp->atg_ring_cnt++;
834 
835 	/*
836 	 * The TX side has no concept of ring groups unlike RX groups.
837 	 * There is just a single group which stores all the TX rings.
838 	 * This group will be used to store aggr's pseudo TX rings.
839 	 */
840 	if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) {
841 		ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
842 		ring->atr_hw_rh = NULL;
843 		ring->atr_port = NULL;
844 		tx_grp->atg_ring_cnt--;
845 	} else {
846 		*pseudo_rh = mac_find_ring(tx_grp->atg_gh, i);
847 		if (hw_rh != NULL) {
848 			mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring,
849 			    mac_find_ring(tx_grp->atg_gh, i));
850 		}
851 	}
852 
853 	return (err);
854 }
855 
856 /*
857  * Remove the pseudo TX ring of the given HW ring handle.
858  */
859 static void
860 aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp,
861     mac_ring_handle_t pseudo_hw_rh)
862 {
863 	aggr_pseudo_tx_ring_t	*ring;
864 	int			i;
865 
866 	for (i = 0; i < MAX_RINGS_PER_GROUP; i++) {
867 		ring = tx_grp->atg_rings + i;
868 		if (ring->atr_rh != pseudo_hw_rh)
869 			continue;
870 
871 		ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE);
872 		mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh);
873 		ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE;
874 		mac_hwring_teardown(ring->atr_hw_rh);
875 		ring->atr_hw_rh = NULL;
876 		ring->atr_port = NULL;
877 		tx_grp->atg_ring_cnt--;
878 		break;
879 	}
880 }
881 
882 /*
883  * This function is called to create pseudo rings over hardware rings of
884  * the underlying device. There is a 1:1 mapping between the pseudo TX
885  * rings of the aggr and the hardware rings of the underlying port.
886  */
887 static int
888 aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
889 {
890 	aggr_grp_t		*grp = port->lp_grp;
891 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh;
892 	mac_perim_handle_t	pmph;
893 	int			hw_rh_cnt, i = 0, j;
894 	int			err = 0;
895 
896 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
897 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
898 
899 	/*
900 	 * Get the list the the underlying HW rings.
901 	 */
902 	hw_rh_cnt = mac_hwrings_get(port->lp_mch,
903 	    NULL, hw_rh, MAC_RING_TYPE_TX);
904 
905 	/*
906 	 * Even if the underlying NIC does not have TX rings, we
907 	 * still make a psuedo TX ring for that NIC with NULL as
908 	 * the ring handle.
909 	 */
910 	if (hw_rh_cnt == 0)
911 		port->lp_tx_ring_cnt = 1;
912 	else
913 		port->lp_tx_ring_cnt = hw_rh_cnt;
914 
915 	port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
916 	    port->lp_tx_ring_cnt), KM_SLEEP);
917 	port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
918 	    port->lp_tx_ring_cnt), KM_SLEEP);
919 
920 	if (hw_rh_cnt == 0) {
921 		if ((err = aggr_add_pseudo_tx_ring(port, tx_grp,
922 		    NULL, &pseudo_rh)) == 0) {
923 			port->lp_tx_rings[0] = NULL;
924 			port->lp_pseudo_tx_rings[0] = pseudo_rh;
925 		}
926 	} else {
927 		for (i = 0; err == 0 && i < hw_rh_cnt; i++) {
928 			err = aggr_add_pseudo_tx_ring(port,
929 			    tx_grp, hw_rh[i], &pseudo_rh);
930 			if (err != 0)
931 				break;
932 			port->lp_tx_rings[i] = hw_rh[i];
933 			port->lp_pseudo_tx_rings[i] = pseudo_rh;
934 		}
935 	}
936 
937 	if (err != 0) {
938 		if (hw_rh_cnt != 0) {
939 			for (j = 0; j < i; j++) {
940 				aggr_rem_pseudo_tx_ring(tx_grp,
941 				    port->lp_pseudo_tx_rings[j]);
942 			}
943 		}
944 		kmem_free(port->lp_tx_rings,
945 		    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
946 		kmem_free(port->lp_pseudo_tx_rings,
947 		    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
948 		port->lp_tx_ring_cnt = 0;
949 	} else {
950 		port->lp_tx_grp_added = B_TRUE;
951 		port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch,
952 		    aggr_tx_ring_update, port);
953 	}
954 	mac_perim_exit(pmph);
955 	aggr_grp_update_default(grp);
956 	return (err);
957 }
958 
959 /*
960  * This function is called by aggr to remove pseudo TX rings over the
961  * HW rings of the underlying port.
962  */
963 static void
964 aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp)
965 {
966 	aggr_grp_t		*grp = port->lp_grp;
967 	mac_perim_handle_t	pmph;
968 	int			i;
969 
970 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
971 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
972 
973 	if (!port->lp_tx_grp_added)
974 		goto done;
975 
976 	ASSERT(tx_grp->atg_gh != NULL);
977 
978 	for (i = 0; i < port->lp_tx_ring_cnt; i++)
979 		aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]);
980 
981 	kmem_free(port->lp_tx_rings,
982 	    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
983 	kmem_free(port->lp_pseudo_tx_rings,
984 	    (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt));
985 
986 	port->lp_tx_ring_cnt = 0;
987 	(void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh);
988 	port->lp_tx_grp_added = B_FALSE;
989 	aggr_grp_update_default(grp);
990 done:
991 	mac_perim_exit(pmph);
992 }
993 
994 static int
995 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
996 {
997 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
998 	return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
999 }
1000 
1001 static int
1002 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
1003 {
1004 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
1005 	return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
1006 }
1007 
1008 /*
1009  * Here we need to start the pseudo-ring. As MAC already ensures that the
1010  * underlying device is set up, all we need to do is save the ring generation.
1011  *
1012  * Note, we don't end up wanting to use the underlying mac_hwring_start/stop
1013  * functions here as those don't actually stop and start the ring, they just
1014  * quiesce the ring. Regardless of whether the aggr is logically up or not, we
1015  * want to make sure that we can receive traffic for LACP.
1016  */
1017 static int
1018 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
1019 {
1020 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
1021 
1022 	rr_ring->arr_gen = mr_gen;
1023 	return (0);
1024 }
1025 
1026 /*
1027  * Add one or more ports to an existing link aggregation group.
1028  */
1029 int
1030 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
1031     laioc_port_t *ports)
1032 {
1033 	int rc, i, nadded = 0;
1034 	aggr_grp_t *grp = NULL;
1035 	aggr_port_t *port;
1036 	boolean_t link_state_changed = B_FALSE;
1037 	mac_perim_handle_t mph, pmph;
1038 
1039 	/* get group corresponding to linkid */
1040 	rw_enter(&aggr_grp_lock, RW_READER);
1041 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1042 	    (mod_hash_val_t *)&grp) != 0) {
1043 		rw_exit(&aggr_grp_lock);
1044 		return (ENOENT);
1045 	}
1046 	AGGR_GRP_REFHOLD(grp);
1047 
1048 	/*
1049 	 * Hold the perimeter so that the aggregation won't be destroyed.
1050 	 */
1051 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1052 	rw_exit(&aggr_grp_lock);
1053 
1054 	/* add the specified ports to group */
1055 	for (i = 0; i < nports; i++) {
1056 		/* add port to group */
1057 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
1058 		    force, &port)) != 0) {
1059 			goto bail;
1060 		}
1061 		ASSERT(port != NULL);
1062 		nadded++;
1063 
1064 		/* check capabilities */
1065 		if (!aggr_grp_capab_check(grp, port) ||
1066 		    !aggr_grp_sdu_check(grp, port) ||
1067 		    !aggr_grp_margin_check(grp, port)) {
1068 			rc = ENOTSUP;
1069 			goto bail;
1070 		}
1071 
1072 		/*
1073 		 * Create the pseudo ring for each HW ring of the underlying
1074 		 * port.
1075 		 */
1076 		rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group);
1077 		if (rc != 0)
1078 			goto bail;
1079 		rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
1080 		if (rc != 0)
1081 			goto bail;
1082 
1083 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1084 
1085 		/* set LACP mode */
1086 		aggr_port_lacp_set_mode(grp, port);
1087 
1088 		/* start port if group has already been started */
1089 		if (grp->lg_started) {
1090 			rc = aggr_port_start(port);
1091 			if (rc != 0) {
1092 				mac_perim_exit(pmph);
1093 				goto bail;
1094 			}
1095 
1096 			/*
1097 			 * Turn on the promiscuous mode over the port when it
1098 			 * is requested to be turned on to receive the
1099 			 * non-primary address over a port, or the promiscous
1100 			 * mode is enabled over the aggr.
1101 			 */
1102 			if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1103 				rc = aggr_port_promisc(port, B_TRUE);
1104 				if (rc != 0) {
1105 					mac_perim_exit(pmph);
1106 					goto bail;
1107 				}
1108 			}
1109 		}
1110 		mac_perim_exit(pmph);
1111 
1112 		/*
1113 		 * Attach each port if necessary.
1114 		 */
1115 		if (aggr_port_notify_link(grp, port))
1116 			link_state_changed = B_TRUE;
1117 
1118 		/*
1119 		 * Initialize the callback functions for this port.
1120 		 */
1121 		aggr_port_init_callbacks(port);
1122 	}
1123 
1124 	/* update the MAC address of the constituent ports */
1125 	if (aggr_grp_update_ports_mac(grp))
1126 		link_state_changed = B_TRUE;
1127 
1128 	if (link_state_changed)
1129 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1130 
1131 bail:
1132 	if (rc != 0) {
1133 		/* stop and remove ports that have been added */
1134 		for (i = 0; i < nadded; i++) {
1135 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1136 			ASSERT(port != NULL);
1137 			if (grp->lg_started) {
1138 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
1139 				(void) aggr_port_promisc(port, B_FALSE);
1140 				aggr_port_stop(port);
1141 				mac_perim_exit(pmph);
1142 			}
1143 			aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1144 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1145 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
1146 		}
1147 	}
1148 
1149 	mac_perim_exit(mph);
1150 	AGGR_GRP_REFRELE(grp);
1151 	return (rc);
1152 }
1153 
1154 static int
1155 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
1156     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1157     aggr_lacp_timer_t lacp_timer)
1158 {
1159 	boolean_t mac_addr_changed = B_FALSE;
1160 	boolean_t link_state_changed = B_FALSE;
1161 	mac_perim_handle_t pmph;
1162 
1163 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1164 
1165 	/* validate fixed address if specified */
1166 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
1167 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
1168 	    (mac_addr[0] & 0x01))) {
1169 		return (EINVAL);
1170 	}
1171 
1172 	/* update policy if requested */
1173 	if (update_mask & AGGR_MODIFY_POLICY)
1174 		aggr_send_update_policy(grp, policy);
1175 
1176 	/* update unicast MAC address if requested */
1177 	if (update_mask & AGGR_MODIFY_MAC) {
1178 		if (mac_fixed) {
1179 			/* user-supplied MAC address */
1180 			grp->lg_mac_addr_port = NULL;
1181 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
1182 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1183 				mac_addr_changed = B_TRUE;
1184 			}
1185 		} else if (grp->lg_addr_fixed) {
1186 			/* switch from user-supplied to automatic */
1187 			aggr_port_t *port = grp->lg_ports;
1188 
1189 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1190 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
1191 			grp->lg_mac_addr_port = port;
1192 			mac_addr_changed = B_TRUE;
1193 			mac_perim_exit(pmph);
1194 		}
1195 		grp->lg_addr_fixed = mac_fixed;
1196 	}
1197 
1198 	if (mac_addr_changed)
1199 		link_state_changed = aggr_grp_update_ports_mac(grp);
1200 
1201 	if (update_mask & AGGR_MODIFY_LACP_MODE)
1202 		aggr_lacp_update_mode(grp, lacp_mode);
1203 
1204 	if (update_mask & AGGR_MODIFY_LACP_TIMER)
1205 		aggr_lacp_update_timer(grp, lacp_timer);
1206 
1207 	if (link_state_changed)
1208 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1209 
1210 	if (mac_addr_changed)
1211 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
1212 
1213 	return (0);
1214 }
1215 
1216 /*
1217  * Update properties of an existing link aggregation group.
1218  */
1219 int
1220 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
1221     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
1222     aggr_lacp_timer_t lacp_timer)
1223 {
1224 	aggr_grp_t *grp = NULL;
1225 	mac_perim_handle_t mph;
1226 	int err;
1227 
1228 	/* get group corresponding to linkid */
1229 	rw_enter(&aggr_grp_lock, RW_READER);
1230 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1231 	    (mod_hash_val_t *)&grp) != 0) {
1232 		rw_exit(&aggr_grp_lock);
1233 		return (ENOENT);
1234 	}
1235 	AGGR_GRP_REFHOLD(grp);
1236 
1237 	/*
1238 	 * Hold the perimeter so that the aggregation won't be destroyed.
1239 	 */
1240 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1241 	rw_exit(&aggr_grp_lock);
1242 
1243 	err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
1244 	    mac_addr, lacp_mode, lacp_timer);
1245 
1246 	mac_perim_exit(mph);
1247 	AGGR_GRP_REFRELE(grp);
1248 	return (err);
1249 }
1250 
1251 /*
1252  * Create a new link aggregation group upon request from administrator.
1253  * Returns 0 on success, an errno on failure.
1254  */
1255 int
1256 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
1257     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
1258     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
1259     cred_t *credp)
1260 {
1261 	aggr_grp_t *grp = NULL;
1262 	aggr_port_t *port;
1263 	mac_register_t *mac;
1264 	boolean_t link_state_changed;
1265 	mac_perim_handle_t mph;
1266 	int err;
1267 	int i;
1268 	kt_did_t tid = 0;
1269 
1270 	/* need at least one port */
1271 	if (nports == 0)
1272 		return (EINVAL);
1273 
1274 	rw_enter(&aggr_grp_lock, RW_WRITER);
1275 
1276 	/* does a group with the same linkid already exist? */
1277 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1278 	    (mod_hash_val_t *)&grp);
1279 	if (err == 0) {
1280 		rw_exit(&aggr_grp_lock);
1281 		return (EEXIST);
1282 	}
1283 
1284 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
1285 
1286 	grp->lg_refs = 1;
1287 	grp->lg_closing = B_FALSE;
1288 	grp->lg_force = force;
1289 	grp->lg_linkid = linkid;
1290 	grp->lg_zoneid = crgetzoneid(credp);
1291 	grp->lg_ifspeed = 0;
1292 	grp->lg_link_state = LINK_STATE_UNKNOWN;
1293 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1294 	grp->lg_started = B_FALSE;
1295 	grp->lg_promisc = B_FALSE;
1296 	grp->lg_lacp_done = B_FALSE;
1297 	grp->lg_tx_notify_done = B_FALSE;
1298 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1299 	grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1300 	    aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1301 	grp->lg_tx_notify_thread = thread_create(NULL, 0,
1302 	    aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1303 	grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) *
1304 	    MAX_RINGS_PER_GROUP), KM_SLEEP);
1305 	grp->lg_tx_blocked_cnt = 0;
1306 	bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1307 	bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t));
1308 	aggr_lacp_init_grp(grp);
1309 
1310 	/* add MAC ports to group */
1311 	grp->lg_ports = NULL;
1312 	grp->lg_nports = 0;
1313 	grp->lg_nattached_ports = 0;
1314 	grp->lg_ntx_ports = 0;
1315 
1316 	/*
1317 	 * If key is not specified by the user, allocate the key.
1318 	 */
1319 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1320 		err = ENOMEM;
1321 		goto bail;
1322 	}
1323 	grp->lg_key = key;
1324 
1325 	for (i = 0; i < nports; i++) {
1326 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1327 		if (err != 0)
1328 			goto bail;
1329 	}
1330 
1331 	/*
1332 	 * If no explicit MAC address was specified by the administrator,
1333 	 * set it to the MAC address of the first port.
1334 	 */
1335 	grp->lg_addr_fixed = mac_fixed;
1336 	if (grp->lg_addr_fixed) {
1337 		/* validate specified address */
1338 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1339 			err = EINVAL;
1340 			goto bail;
1341 		}
1342 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1343 	} else {
1344 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1345 		grp->lg_mac_addr_port = grp->lg_ports;
1346 	}
1347 
1348 	/* set the initial group capabilities */
1349 	aggr_grp_capab_set(grp);
1350 
1351 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1352 		err = ENOMEM;
1353 		goto bail;
1354 	}
1355 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1356 	mac->m_driver = grp;
1357 	mac->m_dip = aggr_dip;
1358 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1359 	mac->m_src_addr = grp->lg_addr;
1360 	mac->m_callbacks = &aggr_m_callbacks;
1361 	mac->m_min_sdu = 0;
1362 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1363 	mac->m_margin = aggr_grp_max_margin(grp);
1364 	mac->m_v12n = MAC_VIRT_LEVEL1;
1365 	err = mac_register(mac, &grp->lg_mh);
1366 	mac_free(mac);
1367 	if (err != 0)
1368 		goto bail;
1369 
1370 	err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
1371 	if (err != 0) {
1372 		(void) mac_unregister(grp->lg_mh);
1373 		grp->lg_mh = NULL;
1374 		goto bail;
1375 	}
1376 
1377 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1378 
1379 	/*
1380 	 * Update the MAC address of the constituent ports.
1381 	 * None of the port is attached at this time, the link state of the
1382 	 * aggregation will not change.
1383 	 */
1384 	link_state_changed = aggr_grp_update_ports_mac(grp);
1385 	ASSERT(!link_state_changed);
1386 
1387 	/* update outbound load balancing policy */
1388 	aggr_send_update_policy(grp, policy);
1389 
1390 	/* set LACP mode */
1391 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1392 
1393 	/*
1394 	 * Attach each port if necessary.
1395 	 */
1396 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1397 		/*
1398 		 * Create the pseudo ring for each HW ring of the underlying
1399 		 * port. Note that this is done after the aggr registers the
1400 		 * mac.
1401 		 */
1402 		VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0);
1403 		VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1404 		if (aggr_port_notify_link(grp, port))
1405 			link_state_changed = B_TRUE;
1406 
1407 		/*
1408 		 * Initialize the callback functions for this port.
1409 		 */
1410 		aggr_port_init_callbacks(port);
1411 	}
1412 
1413 	if (link_state_changed)
1414 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1415 
1416 	/* add new group to hash table */
1417 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1418 	    (mod_hash_val_t)grp);
1419 	ASSERT(err == 0);
1420 	aggr_grp_cnt++;
1421 
1422 	mac_perim_exit(mph);
1423 	rw_exit(&aggr_grp_lock);
1424 	return (0);
1425 
1426 bail:
1427 
1428 	grp->lg_closing = B_TRUE;
1429 
1430 	port = grp->lg_ports;
1431 	while (port != NULL) {
1432 		aggr_port_t *cport;
1433 
1434 		cport = port->lp_next;
1435 		aggr_port_delete(port);
1436 		port = cport;
1437 	}
1438 
1439 	/*
1440 	 * Inform the lacp_rx thread to exit.
1441 	 */
1442 	mutex_enter(&grp->lg_lacp_lock);
1443 	grp->lg_lacp_done = B_TRUE;
1444 	cv_signal(&grp->lg_lacp_cv);
1445 	while (grp->lg_lacp_rx_thread != NULL)
1446 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1447 	mutex_exit(&grp->lg_lacp_lock);
1448 	/*
1449 	 * Inform the tx_notify thread to exit.
1450 	 */
1451 	mutex_enter(&grp->lg_tx_flowctl_lock);
1452 	if (grp->lg_tx_notify_thread != NULL) {
1453 		tid = grp->lg_tx_notify_thread->t_did;
1454 		grp->lg_tx_notify_done = B_TRUE;
1455 		cv_signal(&grp->lg_tx_flowctl_cv);
1456 	}
1457 	mutex_exit(&grp->lg_tx_flowctl_lock);
1458 	if (tid != 0)
1459 		thread_join(tid);
1460 
1461 	kmem_free(grp->lg_tx_blocked_rings,
1462 	    (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1463 	rw_exit(&aggr_grp_lock);
1464 	AGGR_GRP_REFRELE(grp);
1465 	return (err);
1466 }
1467 
1468 /*
1469  * Return a pointer to the member of a group with specified linkid.
1470  */
1471 static aggr_port_t *
1472 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1473 {
1474 	aggr_port_t *port;
1475 
1476 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1477 
1478 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1479 		if (port->lp_linkid == linkid)
1480 			break;
1481 	}
1482 
1483 	return (port);
1484 }
1485 
1486 /*
1487  * Stop, detach and remove a port from a link aggregation group.
1488  */
1489 static int
1490 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1491     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1492 {
1493 	int rc = 0;
1494 	aggr_port_t **pport;
1495 	boolean_t mac_addr_changed = B_FALSE;
1496 	boolean_t link_state_changed = B_FALSE;
1497 	mac_perim_handle_t mph;
1498 	uint64_t val;
1499 	uint_t i;
1500 	uint_t stat;
1501 
1502 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1503 	ASSERT(grp->lg_nports > 1);
1504 	ASSERT(!grp->lg_closing);
1505 
1506 	/* unlink port */
1507 	for (pport = &grp->lg_ports; *pport != port;
1508 	    pport = &(*pport)->lp_next) {
1509 		if (*pport == NULL) {
1510 			rc = ENOENT;
1511 			goto done;
1512 		}
1513 	}
1514 	*pport = port->lp_next;
1515 
1516 	mac_perim_enter_by_mh(port->lp_mh, &mph);
1517 
1518 	/*
1519 	 * If the MAC address of the port being removed was assigned
1520 	 * to the group, update the group MAC address
1521 	 * using the MAC address of a different port.
1522 	 */
1523 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1524 		/*
1525 		 * Set the MAC address of the group to the
1526 		 * MAC address of its first port.
1527 		 */
1528 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1529 		grp->lg_mac_addr_port = grp->lg_ports;
1530 		mac_addr_changed = B_TRUE;
1531 	}
1532 
1533 	link_state_changed = aggr_grp_detach_port(grp, port);
1534 
1535 	/*
1536 	 * Add the counter statistics of the ports while it was aggregated
1537 	 * to the group's residual statistics.  This is done by obtaining
1538 	 * the current counter from the underlying MAC then subtracting the
1539 	 * value of the counter at the moment it was added to the
1540 	 * aggregation.
1541 	 */
1542 	for (i = 0; i < MAC_NSTAT; i++) {
1543 		stat = i + MAC_STAT_MIN;
1544 		if (!MAC_STAT_ISACOUNTER(stat))
1545 			continue;
1546 		val = aggr_port_stat(port, stat);
1547 		val -= port->lp_stat[i];
1548 		grp->lg_stat[i] += val;
1549 	}
1550 	for (i = 0; i < ETHER_NSTAT; i++) {
1551 		stat = i + MACTYPE_STAT_MIN;
1552 		if (!ETHER_STAT_ISACOUNTER(stat))
1553 			continue;
1554 		val = aggr_port_stat(port, stat);
1555 		val -= port->lp_ether_stat[i];
1556 		grp->lg_ether_stat[i] += val;
1557 	}
1558 
1559 	grp->lg_nports--;
1560 	mac_perim_exit(mph);
1561 
1562 	aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1563 	aggr_port_delete(port);
1564 
1565 	/*
1566 	 * If the group MAC address has changed, update the MAC address of
1567 	 * the remaining constituent ports according to the new MAC
1568 	 * address of the group.
1569 	 */
1570 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1571 		link_state_changed = B_TRUE;
1572 
1573 done:
1574 	if (mac_addr_changedp != NULL)
1575 		*mac_addr_changedp = mac_addr_changed;
1576 	if (link_state_changedp != NULL)
1577 		*link_state_changedp = link_state_changed;
1578 
1579 	return (rc);
1580 }
1581 
1582 /*
1583  * Remove one or more ports from an existing link aggregation group.
1584  */
1585 int
1586 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1587 {
1588 	int rc = 0, i;
1589 	aggr_grp_t *grp = NULL;
1590 	aggr_port_t *port;
1591 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1592 	boolean_t link_state_update = B_FALSE, link_state_changed;
1593 	mac_perim_handle_t mph, pmph;
1594 
1595 	/* get group corresponding to linkid */
1596 	rw_enter(&aggr_grp_lock, RW_READER);
1597 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1598 	    (mod_hash_val_t *)&grp) != 0) {
1599 		rw_exit(&aggr_grp_lock);
1600 		return (ENOENT);
1601 	}
1602 	AGGR_GRP_REFHOLD(grp);
1603 
1604 	/*
1605 	 * Hold the perimeter so that the aggregation won't be destroyed.
1606 	 */
1607 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1608 	rw_exit(&aggr_grp_lock);
1609 
1610 	/* we need to keep at least one port per group */
1611 	if (nports >= grp->lg_nports) {
1612 		rc = EINVAL;
1613 		goto bail;
1614 	}
1615 
1616 	/* first verify that all the groups are valid */
1617 	for (i = 0; i < nports; i++) {
1618 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1619 			/* port not found */
1620 			rc = ENOENT;
1621 			goto bail;
1622 		}
1623 	}
1624 
1625 	/* clear the promiscous mode for the specified ports */
1626 	for (i = 0; i < nports && rc == 0; i++) {
1627 		/* lookup port */
1628 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1629 		ASSERT(port != NULL);
1630 
1631 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1632 		rc = aggr_port_promisc(port, B_FALSE);
1633 		mac_perim_exit(pmph);
1634 	}
1635 	if (rc != 0) {
1636 		for (i = 0; i < nports; i++) {
1637 			port = aggr_grp_port_lookup(grp,
1638 			    ports[i].lp_linkid);
1639 			ASSERT(port != NULL);
1640 
1641 			/*
1642 			 * Turn the promiscuous mode back on if it is required
1643 			 * to receive the non-primary address over a port, or
1644 			 * the promiscous mode is enabled over the aggr.
1645 			 */
1646 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1647 			if (port->lp_started && (grp->lg_promisc ||
1648 			    port->lp_prom_addr != NULL)) {
1649 				(void) aggr_port_promisc(port, B_TRUE);
1650 			}
1651 			mac_perim_exit(pmph);
1652 		}
1653 		goto bail;
1654 	}
1655 
1656 	/* remove the specified ports from group */
1657 	for (i = 0; i < nports; i++) {
1658 		/* lookup port */
1659 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1660 		ASSERT(port != NULL);
1661 
1662 		/* stop port if group has already been started */
1663 		if (grp->lg_started) {
1664 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1665 			aggr_port_stop(port);
1666 			mac_perim_exit(pmph);
1667 		}
1668 
1669 		/*
1670 		 * aggr_rem_pseudo_tx_group() is not called here. Instead
1671 		 * it is called from inside aggr_grp_rem_port() after the
1672 		 * port has been detached. The reason is that
1673 		 * aggr_rem_pseudo_tx_group() removes one ring at a time
1674 		 * and if there is still traffic going on, then there
1675 		 * is the possibility of aggr_find_tx_ring() returning a
1676 		 * removed ring for transmission. Once the port has been
1677 		 * detached, that port will not be used and
1678 		 * aggr_find_tx_ring() will not return any rings
1679 		 * belonging to it.
1680 		 */
1681 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1682 
1683 		/* remove port from group */
1684 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1685 		    &link_state_changed);
1686 		ASSERT(rc == 0);
1687 		mac_addr_update = mac_addr_update || mac_addr_changed;
1688 		link_state_update = link_state_update || link_state_changed;
1689 	}
1690 
1691 bail:
1692 	if (mac_addr_update)
1693 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
1694 	if (link_state_update)
1695 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1696 
1697 	mac_perim_exit(mph);
1698 	AGGR_GRP_REFRELE(grp);
1699 
1700 	return (rc);
1701 }
1702 
1703 int
1704 aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
1705 {
1706 	aggr_grp_t *grp = NULL;
1707 	aggr_port_t *port, *cport;
1708 	datalink_id_t tmpid;
1709 	mod_hash_val_t val;
1710 	mac_perim_handle_t mph, pmph;
1711 	int err;
1712 	kt_did_t tid = 0;
1713 
1714 	rw_enter(&aggr_grp_lock, RW_WRITER);
1715 
1716 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1717 	    (mod_hash_val_t *)&grp) != 0) {
1718 		rw_exit(&aggr_grp_lock);
1719 		return (ENOENT);
1720 	}
1721 
1722 	/*
1723 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1724 	 * held. Otherwise, it will deadlock if another thread is in
1725 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1726 	 * dls_devnet_destroy() needs to delete.
1727 	 */
1728 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1729 		rw_exit(&aggr_grp_lock);
1730 		return (err);
1731 	}
1732 	ASSERT(linkid == tmpid);
1733 
1734 	/*
1735 	 * Unregister from the MAC service module. Since this can
1736 	 * fail if a client hasn't closed the MAC port, we gracefully
1737 	 * fail the operation.
1738 	 */
1739 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1740 		(void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
1741 		rw_exit(&aggr_grp_lock);
1742 		return (err);
1743 	}
1744 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1745 	ASSERT(grp == (aggr_grp_t *)val);
1746 
1747 	ASSERT(aggr_grp_cnt > 0);
1748 	aggr_grp_cnt--;
1749 	rw_exit(&aggr_grp_lock);
1750 
1751 	/*
1752 	 * Inform the lacp_rx thread to exit.
1753 	 */
1754 	mutex_enter(&grp->lg_lacp_lock);
1755 	grp->lg_lacp_done = B_TRUE;
1756 	cv_signal(&grp->lg_lacp_cv);
1757 	while (grp->lg_lacp_rx_thread != NULL)
1758 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1759 	mutex_exit(&grp->lg_lacp_lock);
1760 	/*
1761 	 * Inform the tx_notify_thread to exit.
1762 	 */
1763 	mutex_enter(&grp->lg_tx_flowctl_lock);
1764 	if (grp->lg_tx_notify_thread != NULL) {
1765 		tid = grp->lg_tx_notify_thread->t_did;
1766 		grp->lg_tx_notify_done = B_TRUE;
1767 		cv_signal(&grp->lg_tx_flowctl_cv);
1768 	}
1769 	mutex_exit(&grp->lg_tx_flowctl_lock);
1770 	if (tid != 0)
1771 		thread_join(tid);
1772 
1773 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1774 
1775 	grp->lg_closing = B_TRUE;
1776 	/* detach and free MAC ports associated with group */
1777 	port = grp->lg_ports;
1778 	while (port != NULL) {
1779 		cport = port->lp_next;
1780 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1781 		if (grp->lg_started)
1782 			aggr_port_stop(port);
1783 		(void) aggr_grp_detach_port(grp, port);
1784 		mac_perim_exit(pmph);
1785 		aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group);
1786 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1787 		aggr_port_delete(port);
1788 		port = cport;
1789 	}
1790 
1791 	mac_perim_exit(mph);
1792 
1793 	kmem_free(grp->lg_tx_blocked_rings,
1794 	    (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP));
1795 	/*
1796 	 * Wait for the port's lacp timer thread and its notification callback
1797 	 * to exit before calling mac_unregister() since both needs to access
1798 	 * the mac perimeter of the grp.
1799 	 */
1800 	aggr_grp_port_wait(grp);
1801 
1802 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1803 	grp->lg_mh = NULL;
1804 
1805 	AGGR_GRP_REFRELE(grp);
1806 	return (0);
1807 }
1808 
1809 void
1810 aggr_grp_free(aggr_grp_t *grp)
1811 {
1812 	ASSERT(grp->lg_refs == 0);
1813 	ASSERT(grp->lg_port_ref == 0);
1814 	if (grp->lg_key > AGGR_MAX_KEY) {
1815 		id_free(key_ids, grp->lg_key);
1816 		grp->lg_key = 0;
1817 	}
1818 	kmem_cache_free(aggr_grp_cache, grp);
1819 }
1820 
1821 int
1822 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1823     aggr_grp_info_new_grp_fn_t new_grp_fn,
1824     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1825 {
1826 	aggr_grp_t	*grp;
1827 	aggr_port_t	*port;
1828 	mac_perim_handle_t mph, pmph;
1829 	int		rc = 0;
1830 
1831 	/*
1832 	 * Make sure that the aggregation link is visible from the caller's
1833 	 * zone.
1834 	 */
1835 	if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
1836 		return (ENOENT);
1837 
1838 	rw_enter(&aggr_grp_lock, RW_READER);
1839 
1840 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1841 	    (mod_hash_val_t *)&grp) != 0) {
1842 		rw_exit(&aggr_grp_lock);
1843 		return (ENOENT);
1844 	}
1845 	AGGR_GRP_REFHOLD(grp);
1846 
1847 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1848 	rw_exit(&aggr_grp_lock);
1849 
1850 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1851 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1852 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1853 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1854 
1855 	if (rc != 0)
1856 		goto bail;
1857 
1858 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1859 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1860 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1861 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1862 		mac_perim_exit(pmph);
1863 
1864 		if (rc != 0)
1865 			goto bail;
1866 	}
1867 
1868 bail:
1869 	mac_perim_exit(mph);
1870 	AGGR_GRP_REFRELE(grp);
1871 	return (rc);
1872 }
1873 
1874 /*ARGSUSED*/
1875 static void
1876 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1877 {
1878 	miocnak(q, mp, 0, ENOTSUP);
1879 }
1880 
1881 static int
1882 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1883 {
1884 	aggr_port_t	*port;
1885 	uint_t		stat_index;
1886 
1887 	/* We only aggregate counter statistics. */
1888 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1889 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1890 		return (ENOTSUP);
1891 	}
1892 
1893 	/*
1894 	 * Counter statistics for a group are computed by aggregating the
1895 	 * counters of the members MACs while they were aggregated, plus
1896 	 * the residual counter of the group itself, which is updated each
1897 	 * time a MAC is removed from the group.
1898 	 */
1899 	*val = 0;
1900 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1901 		/* actual port statistic */
1902 		*val += aggr_port_stat(port, stat);
1903 		/*
1904 		 * minus the port stat when it was added, plus any residual
1905 		 * amount for the group.
1906 		 */
1907 		if (IS_MAC_STAT(stat)) {
1908 			stat_index = stat - MAC_STAT_MIN;
1909 			*val -= port->lp_stat[stat_index];
1910 			*val += grp->lg_stat[stat_index];
1911 		} else if (IS_MACTYPE_STAT(stat)) {
1912 			stat_index = stat - MACTYPE_STAT_MIN;
1913 			*val -= port->lp_ether_stat[stat_index];
1914 			*val += grp->lg_ether_stat[stat_index];
1915 		}
1916 	}
1917 	return (0);
1918 }
1919 
1920 int
1921 aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1922 {
1923 	aggr_pseudo_rx_ring_t   *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver;
1924 
1925 	if (rx_ring->arr_hw_rh != NULL) {
1926 		*val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat);
1927 	} else {
1928 		aggr_port_t	*port = rx_ring->arr_port;
1929 
1930 		*val = mac_stat_get(port->lp_mh, stat);
1931 
1932 	}
1933 	return (0);
1934 }
1935 
1936 int
1937 aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val)
1938 {
1939 	aggr_pseudo_tx_ring_t   *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver;
1940 
1941 	if (tx_ring->atr_hw_rh != NULL) {
1942 		*val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat);
1943 	} else {
1944 		aggr_port_t	*port = tx_ring->atr_port;
1945 
1946 		*val = mac_stat_get(port->lp_mh, stat);
1947 	}
1948 	return (0);
1949 }
1950 
1951 static int
1952 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1953 {
1954 	aggr_grp_t		*grp = arg;
1955 	mac_perim_handle_t	mph;
1956 	int			rval = 0;
1957 
1958 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1959 
1960 	switch (stat) {
1961 	case MAC_STAT_IFSPEED:
1962 		*val = grp->lg_ifspeed;
1963 		break;
1964 
1965 	case ETHER_STAT_LINK_DUPLEX:
1966 		*val = grp->lg_link_duplex;
1967 		break;
1968 
1969 	default:
1970 		/*
1971 		 * For all other statistics, we return the aggregated stat
1972 		 * from the underlying ports.  aggr_grp_stat() will set
1973 		 * rval appropriately if the statistic isn't a counter.
1974 		 */
1975 		rval = aggr_grp_stat(grp, stat, val);
1976 	}
1977 
1978 	mac_perim_exit(mph);
1979 	return (rval);
1980 }
1981 
1982 static int
1983 aggr_m_start(void *arg)
1984 {
1985 	aggr_grp_t *grp = arg;
1986 	aggr_port_t *port;
1987 	mac_perim_handle_t mph, pmph;
1988 
1989 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1990 
1991 	/*
1992 	 * Attempts to start all configured members of the group.
1993 	 * Group members will be attached when their link-up notification
1994 	 * is received.
1995 	 */
1996 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1997 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1998 		if (aggr_port_start(port) != 0) {
1999 			mac_perim_exit(pmph);
2000 			continue;
2001 		}
2002 
2003 		/*
2004 		 * Turn on the promiscuous mode if it is required to receive
2005 		 * the non-primary address over a port, or the promiscous
2006 		 * mode is enabled over the aggr.
2007 		 */
2008 		if (grp->lg_promisc || port->lp_prom_addr != NULL) {
2009 			if (aggr_port_promisc(port, B_TRUE) != 0)
2010 				aggr_port_stop(port);
2011 		}
2012 		mac_perim_exit(pmph);
2013 	}
2014 
2015 	grp->lg_started = B_TRUE;
2016 
2017 	mac_perim_exit(mph);
2018 	return (0);
2019 }
2020 
2021 static void
2022 aggr_m_stop(void *arg)
2023 {
2024 	aggr_grp_t *grp = arg;
2025 	aggr_port_t *port;
2026 	mac_perim_handle_t mph, pmph;
2027 
2028 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2029 
2030 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2031 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
2032 
2033 		/* reset port promiscuous mode */
2034 		(void) aggr_port_promisc(port, B_FALSE);
2035 
2036 		aggr_port_stop(port);
2037 		mac_perim_exit(pmph);
2038 	}
2039 
2040 	grp->lg_started = B_FALSE;
2041 	mac_perim_exit(mph);
2042 }
2043 
2044 static int
2045 aggr_m_promisc(void *arg, boolean_t on)
2046 {
2047 	aggr_grp_t *grp = arg;
2048 	aggr_port_t *port;
2049 	boolean_t link_state_changed = B_FALSE;
2050 	mac_perim_handle_t mph, pmph;
2051 
2052 	AGGR_GRP_REFHOLD(grp);
2053 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2054 
2055 	ASSERT(!grp->lg_closing);
2056 
2057 	if (on == grp->lg_promisc)
2058 		goto bail;
2059 
2060 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2061 		int	err = 0;
2062 
2063 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
2064 		AGGR_PORT_REFHOLD(port);
2065 		if (!on && (port->lp_prom_addr == NULL))
2066 			err = aggr_port_promisc(port, B_FALSE);
2067 		else if (on && port->lp_started)
2068 			err = aggr_port_promisc(port, B_TRUE);
2069 
2070 		if (err != 0) {
2071 			if (aggr_grp_detach_port(grp, port))
2072 				link_state_changed = B_TRUE;
2073 		} else {
2074 			/*
2075 			 * If a port was detached because of a previous
2076 			 * failure changing the promiscuity, the port
2077 			 * is reattached when it successfully changes
2078 			 * the promiscuity now, and this might cause
2079 			 * the link state of the aggregation to change.
2080 			 */
2081 			if (aggr_grp_attach_port(grp, port))
2082 				link_state_changed = B_TRUE;
2083 		}
2084 		mac_perim_exit(pmph);
2085 		AGGR_PORT_REFRELE(port);
2086 	}
2087 
2088 	grp->lg_promisc = on;
2089 
2090 	if (link_state_changed)
2091 		mac_link_update(grp->lg_mh, grp->lg_link_state);
2092 
2093 bail:
2094 	mac_perim_exit(mph);
2095 	AGGR_GRP_REFRELE(grp);
2096 
2097 	return (0);
2098 }
2099 
2100 static void
2101 aggr_grp_port_rename(const char *new_name, void *arg)
2102 {
2103 	/*
2104 	 * aggr port's mac client name is the format of "aggr link name" plus
2105 	 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
2106 	 */
2107 	int aggr_len, link_len, clnt_name_len, i;
2108 	char *str_end, *str_st, *str_del;
2109 	char aggr_name[MAXNAMELEN];
2110 	char link_name[MAXNAMELEN];
2111 	char *clnt_name;
2112 	aggr_grp_t *aggr_grp = arg;
2113 	aggr_port_t *aggr_port = aggr_grp->lg_ports;
2114 
2115 	for (i = 0; i < aggr_grp->lg_nports; i++) {
2116 		clnt_name = mac_client_name(aggr_port->lp_mch);
2117 		clnt_name_len = strlen(clnt_name);
2118 		str_st = clnt_name;
2119 		str_end = &(clnt_name[clnt_name_len]);
2120 		str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
2121 		ASSERT(str_del != NULL);
2122 		aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
2123 		link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
2124 		bzero(aggr_name, MAXNAMELEN);
2125 		bzero(link_name, MAXNAMELEN);
2126 		bcopy(clnt_name, aggr_name, aggr_len);
2127 		bcopy(str_del, link_name, link_len + 1);
2128 		bzero(clnt_name, MAXNAMELEN);
2129 		(void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
2130 		    link_name);
2131 
2132 		(void) mac_rename_primary(aggr_port->lp_mh, NULL);
2133 		aggr_port = aggr_port->lp_next;
2134 	}
2135 }
2136 
2137 /*
2138  * Initialize the capabilities that are advertised for the group
2139  * according to the capabilities of the constituent ports.
2140  */
2141 static boolean_t
2142 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
2143 {
2144 	aggr_grp_t *grp = arg;
2145 
2146 	switch (cap) {
2147 	case MAC_CAPAB_HCKSUM: {
2148 		uint32_t *hcksum_txflags = cap_data;
2149 		*hcksum_txflags = grp->lg_hcksum_txflags;
2150 		break;
2151 	}
2152 	case MAC_CAPAB_LSO: {
2153 		mac_capab_lso_t *cap_lso = cap_data;
2154 
2155 		if (grp->lg_lso) {
2156 			*cap_lso = grp->lg_cap_lso;
2157 			break;
2158 		} else {
2159 			return (B_FALSE);
2160 		}
2161 	}
2162 	case MAC_CAPAB_NO_NATIVEVLAN:
2163 		return (!grp->lg_vlan);
2164 	case MAC_CAPAB_NO_ZCOPY:
2165 		return (!grp->lg_zcopy);
2166 	case MAC_CAPAB_RINGS: {
2167 		mac_capab_rings_t *cap_rings = cap_data;
2168 
2169 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
2170 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2171 			cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
2172 
2173 			/*
2174 			 * An aggregation advertises only one (pseudo) RX
2175 			 * group, which virtualizes the main/primary group of
2176 			 * the underlying devices.
2177 			 */
2178 			cap_rings->mr_gnum = 1;
2179 			cap_rings->mr_gaddring = NULL;
2180 			cap_rings->mr_gremring = NULL;
2181 		} else {
2182 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
2183 			cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt;
2184 			cap_rings->mr_gnum = 0;
2185 		}
2186 		cap_rings->mr_rget = aggr_fill_ring;
2187 		cap_rings->mr_gget = aggr_fill_group;
2188 		break;
2189 	}
2190 	case MAC_CAPAB_AGGR:
2191 	{
2192 		mac_capab_aggr_t *aggr_cap;
2193 
2194 		if (cap_data != NULL) {
2195 			aggr_cap = cap_data;
2196 			aggr_cap->mca_rename_fn = aggr_grp_port_rename;
2197 			aggr_cap->mca_unicst = aggr_m_unicst;
2198 			aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring;
2199 			aggr_cap->mca_arg = arg;
2200 		}
2201 		return (B_TRUE);
2202 	}
2203 	default:
2204 		return (B_FALSE);
2205 	}
2206 	return (B_TRUE);
2207 }
2208 
2209 /*
2210  * Callback funtion for MAC layer to register groups.
2211  */
2212 static void
2213 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
2214     mac_group_info_t *infop, mac_group_handle_t gh)
2215 {
2216 	aggr_grp_t *grp = arg;
2217 	aggr_pseudo_rx_group_t *rx_group;
2218 	aggr_pseudo_tx_group_t *tx_group;
2219 
2220 	ASSERT(index == 0);
2221 	if (rtype == MAC_RING_TYPE_RX) {
2222 		rx_group = &grp->lg_rx_group;
2223 		rx_group->arg_gh = gh;
2224 		rx_group->arg_grp = grp;
2225 
2226 		infop->mgi_driver = (mac_group_driver_t)rx_group;
2227 		infop->mgi_start = NULL;
2228 		infop->mgi_stop = NULL;
2229 		infop->mgi_addmac = aggr_addmac;
2230 		infop->mgi_remmac = aggr_remmac;
2231 		infop->mgi_count = rx_group->arg_ring_cnt;
2232 	} else {
2233 		tx_group = &grp->lg_tx_group;
2234 		tx_group->atg_gh = gh;
2235 	}
2236 }
2237 
2238 /*
2239  * Callback funtion for MAC layer to register all rings.
2240  */
2241 static void
2242 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
2243     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
2244 {
2245 	aggr_grp_t	*grp = arg;
2246 
2247 	switch (rtype) {
2248 	case MAC_RING_TYPE_RX: {
2249 		aggr_pseudo_rx_group_t	*rx_group = &grp->lg_rx_group;
2250 		aggr_pseudo_rx_ring_t	*rx_ring;
2251 		mac_intr_t		aggr_mac_intr;
2252 
2253 		ASSERT(rg_index == 0);
2254 
2255 		ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
2256 		rx_ring = rx_group->arg_rings + index;
2257 		rx_ring->arr_rh = rh;
2258 
2259 		/*
2260 		 * Entrypoint to enable interrupt (disable poll) and
2261 		 * disable interrupt (enable poll).
2262 		 */
2263 		aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
2264 		aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
2265 		aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
2266 		aggr_mac_intr.mi_ddi_handle = NULL;
2267 
2268 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
2269 		infop->mri_start = aggr_pseudo_start_ring;
2270 		infop->mri_stop = NULL;
2271 
2272 		infop->mri_intr = aggr_mac_intr;
2273 		infop->mri_poll = aggr_rx_poll;
2274 
2275 		infop->mri_stat = aggr_rx_ring_stat;
2276 		break;
2277 	}
2278 	case MAC_RING_TYPE_TX: {
2279 		aggr_pseudo_tx_group_t	*tx_group = &grp->lg_tx_group;
2280 		aggr_pseudo_tx_ring_t	*tx_ring;
2281 
2282 		ASSERT(rg_index == -1);
2283 		ASSERT(index < tx_group->atg_ring_cnt);
2284 
2285 		tx_ring = &tx_group->atg_rings[index];
2286 		tx_ring->atr_rh = rh;
2287 
2288 		infop->mri_driver = (mac_ring_driver_t)tx_ring;
2289 		infop->mri_start = NULL;
2290 		infop->mri_stop = NULL;
2291 		infop->mri_tx = aggr_ring_tx;
2292 		infop->mri_stat = aggr_tx_ring_stat;
2293 		/*
2294 		 * Use the hw TX ring handle to find if the ring needs
2295 		 * serialization or not. For NICs that do not expose
2296 		 * Tx rings, atr_hw_rh will be NULL.
2297 		 */
2298 		if (tx_ring->atr_hw_rh != NULL) {
2299 			infop->mri_flags =
2300 			    mac_hwring_getinfo(tx_ring->atr_hw_rh);
2301 		}
2302 		break;
2303 	}
2304 	default:
2305 		break;
2306 	}
2307 }
2308 
2309 static mblk_t *
2310 aggr_rx_poll(void *arg, int bytes_to_pickup)
2311 {
2312 	aggr_pseudo_rx_ring_t *rr_ring = arg;
2313 	aggr_port_t *port = rr_ring->arr_port;
2314 	aggr_grp_t *grp = port->lp_grp;
2315 	mblk_t *mp_chain, *mp, **mpp;
2316 
2317 	mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
2318 
2319 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
2320 		return (mp_chain);
2321 
2322 	mpp = &mp_chain;
2323 	while ((mp = *mpp) != NULL) {
2324 		if (MBLKL(mp) >= sizeof (struct ether_header)) {
2325 			struct ether_header *ehp;
2326 
2327 			ehp = (struct ether_header *)mp->b_rptr;
2328 			if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
2329 				*mpp = mp->b_next;
2330 				mp->b_next = NULL;
2331 				aggr_recv_lacp(port,
2332 				    (mac_resource_handle_t)rr_ring, mp);
2333 				continue;
2334 			}
2335 		}
2336 
2337 		if (!port->lp_collector_enabled) {
2338 			*mpp = mp->b_next;
2339 			mp->b_next = NULL;
2340 			freemsg(mp);
2341 			continue;
2342 		}
2343 		mpp = &mp->b_next;
2344 	}
2345 	return (mp_chain);
2346 }
2347 
2348 static int
2349 aggr_addmac(void *arg, const uint8_t *mac_addr)
2350 {
2351 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
2352 	aggr_unicst_addr_t	*addr, **pprev;
2353 	aggr_grp_t		*grp = rx_group->arg_grp;
2354 	aggr_port_t		*port, *p;
2355 	mac_perim_handle_t	mph;
2356 	int			err = 0;
2357 
2358 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2359 
2360 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2361 		mac_perim_exit(mph);
2362 		return (0);
2363 	}
2364 
2365 	/*
2366 	 * Insert this mac address into the list of mac addresses owned by
2367 	 * the aggregation pseudo group.
2368 	 */
2369 	pprev = &rx_group->arg_macaddr;
2370 	while ((addr = *pprev) != NULL) {
2371 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
2372 			mac_perim_exit(mph);
2373 			return (EEXIST);
2374 		}
2375 		pprev = &addr->aua_next;
2376 	}
2377 	addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
2378 	bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
2379 	addr->aua_next = NULL;
2380 	*pprev = addr;
2381 
2382 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2383 		if ((err = aggr_port_addmac(port, mac_addr)) != 0)
2384 			break;
2385 
2386 	if (err != 0) {
2387 		for (p = grp->lg_ports; p != port; p = p->lp_next)
2388 			aggr_port_remmac(p, mac_addr);
2389 
2390 		*pprev = NULL;
2391 		kmem_free(addr, sizeof (aggr_unicst_addr_t));
2392 	}
2393 
2394 	mac_perim_exit(mph);
2395 	return (err);
2396 }
2397 
2398 static int
2399 aggr_remmac(void *arg, const uint8_t *mac_addr)
2400 {
2401 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
2402 	aggr_unicst_addr_t	*addr, **pprev;
2403 	aggr_grp_t		*grp = rx_group->arg_grp;
2404 	aggr_port_t		*port;
2405 	mac_perim_handle_t	mph;
2406 	int			err = 0;
2407 
2408 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2409 
2410 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2411 		mac_perim_exit(mph);
2412 		return (0);
2413 	}
2414 
2415 	/*
2416 	 * Insert this mac address into the list of mac addresses owned by
2417 	 * the aggregation pseudo group.
2418 	 */
2419 	pprev = &rx_group->arg_macaddr;
2420 	while ((addr = *pprev) != NULL) {
2421 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2422 			pprev = &addr->aua_next;
2423 			continue;
2424 		}
2425 		break;
2426 	}
2427 	if (addr == NULL) {
2428 		mac_perim_exit(mph);
2429 		return (EINVAL);
2430 	}
2431 
2432 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2433 		aggr_port_remmac(port, mac_addr);
2434 
2435 	*pprev = addr->aua_next;
2436 	kmem_free(addr, sizeof (aggr_unicst_addr_t));
2437 
2438 	mac_perim_exit(mph);
2439 	return (err);
2440 }
2441 
2442 /*
2443  * Add or remove the multicast addresses that are defined for the group
2444  * to or from the specified port.
2445  *
2446  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2447  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2448  * called when the port is either stopped or detached.
2449  */
2450 void
2451 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2452 {
2453 	aggr_grp_t *grp = port->lp_grp;
2454 
2455 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
2456 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2457 
2458 	if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2459 		return;
2460 
2461 	mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2462 }
2463 
2464 static int
2465 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2466 {
2467 	aggr_grp_t *grp = arg;
2468 	aggr_port_t *port = NULL, *errport = NULL;
2469 	mac_perim_handle_t mph;
2470 	int err = 0;
2471 
2472 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2473 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2474 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2475 		    !port->lp_started) {
2476 			continue;
2477 		}
2478 		err = aggr_port_multicst(port, add, addrp);
2479 		if (err != 0) {
2480 			errport = port;
2481 			break;
2482 		}
2483 	}
2484 
2485 	/*
2486 	 * At least one port caused error return and this error is returned to
2487 	 * mac, eventually a NAK would be sent upwards.
2488 	 * Some ports have this multicast address listed now, and some don't.
2489 	 * Treat this error as a whole aggr failure not individual port failure.
2490 	 * Therefore remove this multicast address from other ports.
2491 	 */
2492 	if ((err != 0) && add) {
2493 		for (port = grp->lg_ports; port != errport;
2494 		    port = port->lp_next) {
2495 			if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2496 			    !port->lp_started) {
2497 				continue;
2498 			}
2499 			(void) aggr_port_multicst(port, B_FALSE, addrp);
2500 		}
2501 	}
2502 	mac_perim_exit(mph);
2503 	return (err);
2504 }
2505 
2506 static int
2507 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2508 {
2509 	aggr_grp_t *grp = arg;
2510 	mac_perim_handle_t mph;
2511 	int err;
2512 
2513 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2514 	err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2515 	    0, 0);
2516 	mac_perim_exit(mph);
2517 	return (err);
2518 }
2519 
2520 /*
2521  * Initialize the capabilities that are advertised for the group
2522  * according to the capabilities of the constituent ports.
2523  */
2524 static void
2525 aggr_grp_capab_set(aggr_grp_t *grp)
2526 {
2527 	uint32_t cksum;
2528 	aggr_port_t *port;
2529 	mac_capab_lso_t cap_lso;
2530 
2531 	ASSERT(grp->lg_mh == NULL);
2532 	ASSERT(grp->lg_ports != NULL);
2533 
2534 	grp->lg_hcksum_txflags = (uint32_t)-1;
2535 	grp->lg_zcopy = B_TRUE;
2536 	grp->lg_vlan = B_TRUE;
2537 
2538 	grp->lg_lso = B_TRUE;
2539 	grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
2540 	grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
2541 
2542 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2543 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2544 			cksum = 0;
2545 		grp->lg_hcksum_txflags &= cksum;
2546 
2547 		grp->lg_vlan &=
2548 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2549 
2550 		grp->lg_zcopy &=
2551 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2552 
2553 		grp->lg_lso &=
2554 		    mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
2555 		if (grp->lg_lso) {
2556 			grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
2557 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2558 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
2559 				grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
2560 				    cap_lso.lso_basic_tcp_ipv4.lso_max;
2561 		}
2562 	}
2563 }
2564 
2565 /*
2566  * Checks whether the capabilities of the port being added are compatible
2567  * with the current capabilities of the aggregation.
2568  */
2569 static boolean_t
2570 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2571 {
2572 	uint32_t hcksum_txflags;
2573 
2574 	ASSERT(grp->lg_ports != NULL);
2575 
2576 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2577 	    grp->lg_vlan) != grp->lg_vlan) {
2578 		return (B_FALSE);
2579 	}
2580 
2581 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2582 	    grp->lg_zcopy) != grp->lg_zcopy) {
2583 		return (B_FALSE);
2584 	}
2585 
2586 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2587 		if (grp->lg_hcksum_txflags != 0)
2588 			return (B_FALSE);
2589 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2590 	    grp->lg_hcksum_txflags) {
2591 		return (B_FALSE);
2592 	}
2593 
2594 	if (grp->lg_lso) {
2595 		mac_capab_lso_t cap_lso;
2596 
2597 		if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
2598 			if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
2599 			    grp->lg_cap_lso.lso_flags)
2600 				return (B_FALSE);
2601 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2602 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
2603 				return (B_FALSE);
2604 		} else {
2605 			return (B_FALSE);
2606 		}
2607 	}
2608 
2609 	return (B_TRUE);
2610 }
2611 
2612 /*
2613  * Returns the maximum SDU according to the SDU of the constituent ports.
2614  */
2615 static uint_t
2616 aggr_grp_max_sdu(aggr_grp_t *grp)
2617 {
2618 	uint_t max_sdu = (uint_t)-1;
2619 	aggr_port_t *port;
2620 
2621 	ASSERT(grp->lg_ports != NULL);
2622 
2623 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2624 		uint_t port_sdu_max;
2625 
2626 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2627 		if (max_sdu > port_sdu_max)
2628 			max_sdu = port_sdu_max;
2629 	}
2630 
2631 	return (max_sdu);
2632 }
2633 
2634 /*
2635  * Checks if the maximum SDU of the specified port is compatible
2636  * with the maximum SDU of the specified aggregation group, returns
2637  * B_TRUE if it is, B_FALSE otherwise.
2638  */
2639 static boolean_t
2640 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2641 {
2642 	uint_t port_sdu_max;
2643 
2644 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2645 	return (port_sdu_max >= grp->lg_max_sdu);
2646 }
2647 
2648 /*
2649  * Returns the maximum margin according to the margin of the constituent ports.
2650  */
2651 static uint32_t
2652 aggr_grp_max_margin(aggr_grp_t *grp)
2653 {
2654 	uint32_t margin = UINT32_MAX;
2655 	aggr_port_t *port;
2656 
2657 	ASSERT(grp->lg_mh == NULL);
2658 	ASSERT(grp->lg_ports != NULL);
2659 
2660 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2661 		if (margin > port->lp_margin)
2662 			margin = port->lp_margin;
2663 	}
2664 
2665 	grp->lg_margin = margin;
2666 	return (margin);
2667 }
2668 
2669 /*
2670  * Checks if the maximum margin of the specified port is compatible
2671  * with the maximum margin of the specified aggregation group, returns
2672  * B_TRUE if it is, B_FALSE otherwise.
2673  */
2674 static boolean_t
2675 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2676 {
2677 	if (port->lp_margin >= grp->lg_margin)
2678 		return (B_TRUE);
2679 
2680 	/*
2681 	 * See whether the current margin value is allowed to be changed to
2682 	 * the new value.
2683 	 */
2684 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2685 		return (B_FALSE);
2686 
2687 	grp->lg_margin = port->lp_margin;
2688 	return (B_TRUE);
2689 }
2690 
2691 /*
2692  * Set MTU on individual ports of an aggregation group
2693  */
2694 static int
2695 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2696     uint32_t *old_mtu)
2697 {
2698 	boolean_t		removed = B_FALSE;
2699 	mac_perim_handle_t	mph;
2700 	mac_diag_t		diag;
2701 	int			err, rv, retry = 0;
2702 
2703 	if (port->lp_mah != NULL) {
2704 		(void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2705 		port->lp_mah = NULL;
2706 		removed = B_TRUE;
2707 	}
2708 	err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2709 try_again:
2710 	if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
2711 	    MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
2712 	    &port->lp_mah, 0, &diag)) != 0) {
2713 		/*
2714 		 * following is a workaround for a bug in 'bge' driver.
2715 		 * See CR 6794654 for more information and this work around
2716 		 * will be removed once the CR is fixed.
2717 		 */
2718 		if (rv == EIO && retry++ < 3) {
2719 			delay(2 * hz);
2720 			goto try_again;
2721 		}
2722 		/*
2723 		 * if mac_unicast_add() failed while setting the MTU,
2724 		 * detach the port from the group.
2725 		 */
2726 		mac_perim_enter_by_mh(port->lp_mh, &mph);
2727 		(void) aggr_grp_detach_port(grp, port);
2728 		mac_perim_exit(mph);
2729 		cmn_err(CE_WARN, "Unable to restart the port %s while "
2730 		    "setting MTU. Detaching the port from the aggregation.",
2731 		    mac_client_name(port->lp_mch));
2732 	}
2733 	return (err);
2734 }
2735 
2736 static int
2737 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2738 {
2739 	int			err = 0, i, rv;
2740 	aggr_port_t		*port;
2741 	uint32_t		*mtu;
2742 
2743 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2744 
2745 	/*
2746 	 * If the MTU being set is equal to aggr group's maximum
2747 	 * allowable value, then there is nothing to change
2748 	 */
2749 	if (sdu == grp->lg_max_sdu)
2750 		return (0);
2751 
2752 	/* 0 is aggr group's min sdu */
2753 	if (sdu == 0)
2754 		return (EINVAL);
2755 
2756 	mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
2757 	for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
2758 	    port = port->lp_next, i++) {
2759 		err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
2760 	}
2761 	if (err != 0) {
2762 		/* recover from error: reset the mtus of the ports */
2763 		aggr_port_t *tmp;
2764 
2765 		for (tmp = grp->lg_ports, i = 0; tmp != port;
2766 		    tmp = tmp->lp_next, i++) {
2767 			(void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
2768 		}
2769 		goto bail;
2770 	}
2771 	grp->lg_max_sdu = aggr_grp_max_sdu(grp);
2772 	rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
2773 	ASSERT(rv == 0);
2774 bail:
2775 	kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
2776 	return (err);
2777 }
2778 
2779 /*
2780  * Callback functions for set/get of properties
2781  */
2782 /*ARGSUSED*/
2783 static int
2784 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2785     uint_t pr_valsize, const void *pr_val)
2786 {
2787 	int		err = ENOTSUP;
2788 	aggr_grp_t	*grp = m_driver;
2789 
2790 	switch (pr_num) {
2791 	case MAC_PROP_MTU: {
2792 		uint32_t	mtu;
2793 
2794 		if (pr_valsize < sizeof (mtu)) {
2795 			err = EINVAL;
2796 			break;
2797 		}
2798 		bcopy(pr_val, &mtu, sizeof (mtu));
2799 		err = aggr_sdu_update(grp, mtu);
2800 		break;
2801 	}
2802 	default:
2803 		break;
2804 	}
2805 	return (err);
2806 }
2807 
2808 typedef struct rboundary {
2809 	uint32_t	bval;
2810 	int		btype;
2811 } rboundary_t;
2812 
2813 /*
2814  * This function finds the intersection of mtu ranges stored in arrays -
2815  * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval.
2816  * Individual arrays are assumed to contain non-overlapping ranges.
2817  * Algorithm:
2818  *   A range has two boundaries - min and max. We scan all arrays and store
2819  * each boundary as a separate element in a temporary array. We also store
2820  * the boundary types, min or max, as +1 or -1 respectively in the temporary
2821  * array. Then we sort the temporary array in ascending order. We scan the
2822  * sorted array from lower to higher values and keep a cumulative sum of
2823  * boundary types. Element in the temporary array for which the sum reaches
2824  * mcount is a min boundary of a range in the result and next element will be
2825  * max boundary.
2826  *
2827  * Example for mcount = 3,
2828  *
2829  *  ----|_________|-------|_______|----|__|------ mrange[0]
2830  *
2831  *  -------|________|--|____________|-----|___|-- mrange[1]
2832  *
2833  *  --------|________________|-------|____|------ mrange[2]
2834  *
2835  *                                      3 2 1
2836  *                                       \|/
2837  *      1  23     2 1  2  3  2    1 01 2  V   0  <- the sum
2838  *  ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array
2839  *
2840  *                                 same min and max
2841  *                                        V
2842  *  --------|_____|-------|__|------------|------ intersecting ranges
2843  */
2844 void
2845 aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount,
2846     mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount)
2847 {
2848 	mac_propval_uint32_range_t	*rval, *ur;
2849 	int				rmaxcnt, rcount;
2850 	size_t				sz_range32;
2851 	rboundary_t			*ta; /* temporary array */
2852 	rboundary_t			temp;
2853 	boolean_t			range_started = B_FALSE;
2854 	int				i, j, m, sum;
2855 
2856 	sz_range32 = sizeof (mac_propval_uint32_range_t);
2857 
2858 	for (i = 0, rmaxcnt = 0; i < mcount; i++)
2859 		rmaxcnt += mrange[i]->mpr_count;
2860 
2861 	/* Allocate enough space to store the results */
2862 	rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP);
2863 
2864 	/* Number of boundaries are twice as many as ranges */
2865 	ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP);
2866 
2867 	for (i = 0, m = 0; i < mcount; i++) {
2868 		ur = &(mrange[i]->mpr_range_uint32[0]);
2869 		for (j = 0; j < mrange[i]->mpr_count; j++) {
2870 			ta[m].bval = ur[j].mpur_min;
2871 			ta[m++].btype = 1;
2872 			ta[m].bval = ur[j].mpur_max;
2873 			ta[m++].btype = -1;
2874 		}
2875 	}
2876 
2877 	/*
2878 	 * Sort the temporary array in ascending order of bval;
2879 	 * if boundary values are same then sort on btype.
2880 	 */
2881 	for (i = 0; i < m-1; i++) {
2882 		for (j = i+1; j < m; j++) {
2883 			if ((ta[i].bval > ta[j].bval) ||
2884 			    ((ta[i].bval == ta[j].bval) &&
2885 			    (ta[i].btype < ta[j].btype))) {
2886 				temp = ta[i];
2887 				ta[i] = ta[j];
2888 				ta[j] = temp;
2889 			}
2890 		}
2891 	}
2892 
2893 	/* Walk through temporary array to find all ranges in the results */
2894 	for (i = 0, sum = 0, rcount = 0; i < m; i++) {
2895 		sum += ta[i].btype;
2896 		if (sum == mcount) {
2897 			rval[rcount].mpur_min = ta[i].bval;
2898 			range_started = B_TRUE;
2899 		} else if (sum < mcount && range_started) {
2900 			rval[rcount++].mpur_max = ta[i].bval;
2901 			range_started = B_FALSE;
2902 		}
2903 	}
2904 
2905 	*prval = rval;
2906 	*prmaxcnt = rmaxcnt;
2907 	*prcount = rcount;
2908 
2909 	kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t));
2910 }
2911 
2912 /*
2913  * Returns the mtu ranges which could be supported by aggr group.
2914  * prmaxcnt returns the size of the buffer prval, prcount returns
2915  * the number of valid entries in prval. Caller is responsible
2916  * for freeing up prval.
2917  */
2918 int
2919 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval,
2920     int *prmaxcnt, int *prcount)
2921 {
2922 	mac_propval_range_t		**vals;
2923 	aggr_port_t			*port;
2924 	mac_perim_handle_t		mph;
2925 	uint_t				i, numr;
2926 	int				err = 0;
2927 	size_t				sz_propval, sz_range32;
2928 	size_t				size;
2929 
2930 	sz_propval = sizeof (mac_propval_range_t);
2931 	sz_range32 = sizeof (mac_propval_uint32_range_t);
2932 
2933 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2934 
2935 	vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports,
2936 	    KM_SLEEP);
2937 
2938 	for (port = grp->lg_ports, i = 0; port != NULL;
2939 	    port = port->lp_next, i++) {
2940 
2941 		size = sz_propval;
2942 		vals[i] = kmem_alloc(size, KM_SLEEP);
2943 		vals[i]->mpr_count = 1;
2944 
2945 		mac_perim_enter_by_mh(port->lp_mh, &mph);
2946 
2947 		err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2948 		    NULL, 0, vals[i], NULL);
2949 		if (err == ENOSPC) {
2950 			/*
2951 			 * Not enough space to hold all ranges.
2952 			 * Allocate extra space as indicated and retry.
2953 			 */
2954 			numr = vals[i]->mpr_count;
2955 			kmem_free(vals[i], sz_propval);
2956 			size = sz_propval + (numr - 1) * sz_range32;
2957 			vals[i] = kmem_alloc(size, KM_SLEEP);
2958 			vals[i]->mpr_count = numr;
2959 			err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL,
2960 			    NULL, 0, vals[i], NULL);
2961 			ASSERT(err != ENOSPC);
2962 		}
2963 		mac_perim_exit(mph);
2964 		if (err != 0) {
2965 			kmem_free(vals[i], size);
2966 			vals[i] = NULL;
2967 			break;
2968 		}
2969 	}
2970 
2971 	/*
2972 	 * if any of the underlying ports does not support changing MTU then
2973 	 * just return ENOTSUP
2974 	 */
2975 	if (port != NULL) {
2976 		ASSERT(err != 0);
2977 		goto done;
2978 	}
2979 
2980 	aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt,
2981 	    prcount);
2982 
2983 done:
2984 	for (i = 0; i < grp->lg_nports; i++) {
2985 		if (vals[i] != NULL) {
2986 			numr = vals[i]->mpr_count;
2987 			size = sz_propval + (numr - 1) * sz_range32;
2988 			kmem_free(vals[i], size);
2989 		}
2990 	}
2991 
2992 	kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports);
2993 	return (err);
2994 }
2995 
2996 static void
2997 aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2998     mac_prop_info_handle_t prh)
2999 {
3000 	aggr_grp_t			*grp = m_driver;
3001 	mac_propval_uint32_range_t	*rval = NULL;
3002 	int				i, rcount, rmaxcnt;
3003 	int				err = 0;
3004 
3005 	_NOTE(ARGUNUSED(pr_name));
3006 
3007 	switch (pr_num) {
3008 	case MAC_PROP_MTU:
3009 
3010 		err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt,
3011 		    &rcount);
3012 		if (err != 0) {
3013 			ASSERT(rval == NULL);
3014 			return;
3015 		}
3016 		for (i = 0; i < rcount; i++) {
3017 			mac_prop_info_set_range_uint32(prh,
3018 			    rval[i].mpur_min, rval[i].mpur_max);
3019 		}
3020 		kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt);
3021 		break;
3022 	}
3023 }
3024