xref: /titanic_44/usr/src/uts/common/io/aggr/aggr_grp.c (revision c1591d2226910ad10594ffb2fa2f1db887f35afb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28  *
29  * An instance of the structure aggr_grp_t is allocated for each
30  * link aggregation group. When created, aggr_grp_t objects are
31  * entered into the aggr_grp_hash hash table maintained by the modhash
32  * module. The hash key is the linkid associated with the link
33  * aggregation group.
34  *
35  * A set of MAC ports are associated with each association group.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/sysmacros.h>
40 #include <sys/conf.h>
41 #include <sys/cmn_err.h>
42 #include <sys/disp.h>
43 #include <sys/list.h>
44 #include <sys/ksynch.h>
45 #include <sys/kmem.h>
46 #include <sys/stream.h>
47 #include <sys/modctl.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/atomic.h>
51 #include <sys/stat.h>
52 #include <sys/modhash.h>
53 #include <sys/id_space.h>
54 #include <sys/strsun.h>
55 #include <sys/dlpi.h>
56 #include <sys/mac_provider.h>
57 #include <sys/dls.h>
58 #include <sys/vlan.h>
59 #include <sys/aggr.h>
60 #include <sys/aggr_impl.h>
61 
62 static int aggr_m_start(void *);
63 static void aggr_m_stop(void *);
64 static int aggr_m_promisc(void *, boolean_t);
65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
66 static int aggr_m_unicst(void *, const uint8_t *);
67 static int aggr_m_stat(void *, uint_t, uint64_t *);
68 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
69 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
70 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
71 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
72     boolean_t *);
73 
74 static void aggr_grp_capab_set(aggr_grp_t *);
75 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
76 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
77 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
78 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
79 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
80 
81 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
82 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
83 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
84 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
85 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
86 static void aggr_pseudo_stop_ring(mac_ring_driver_t);
87 static int aggr_addmac(void *, const uint8_t *);
88 static int aggr_remmac(void *, const uint8_t *);
89 static mblk_t *aggr_rx_poll(void *, int);
90 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
91     const int, mac_ring_info_t *, mac_ring_handle_t);
92 static void aggr_fill_group(void *, mac_ring_type_t, const int,
93     mac_group_info_t *, mac_group_handle_t);
94 
95 static kmem_cache_t	*aggr_grp_cache;
96 static mod_hash_t	*aggr_grp_hash;
97 static krwlock_t	aggr_grp_lock;
98 static uint_t		aggr_grp_cnt;
99 static id_space_t	*key_ids;
100 
101 #define	GRP_HASHSZ		64
102 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
103 #define	AGGR_PORT_NAME_DELIMIT '-'
104 
105 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
106 
107 #define	AGGR_M_CALLBACK_FLAGS	(MC_IOCTL | MC_GETCAPAB)
108 
109 static mac_callbacks_t aggr_m_callbacks = {
110 	AGGR_M_CALLBACK_FLAGS,
111 	aggr_m_stat,
112 	aggr_m_start,
113 	aggr_m_stop,
114 	aggr_m_promisc,
115 	aggr_m_multicst,
116 	NULL,
117 	aggr_m_tx,
118 	aggr_m_ioctl,
119 	aggr_m_capab_get
120 };
121 
122 /*ARGSUSED*/
123 static int
124 aggr_grp_constructor(void *buf, void *arg, int kmflag)
125 {
126 	aggr_grp_t *grp = buf;
127 
128 	bzero(grp, sizeof (*grp));
129 	mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
130 	cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
131 	rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
132 	mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
133 	cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
134 	grp->lg_link_state = LINK_STATE_UNKNOWN;
135 	return (0);
136 }
137 
138 /*ARGSUSED*/
139 static void
140 aggr_grp_destructor(void *buf, void *arg)
141 {
142 	aggr_grp_t *grp = buf;
143 
144 	if (grp->lg_tx_ports != NULL) {
145 		kmem_free(grp->lg_tx_ports,
146 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
147 	}
148 
149 	mutex_destroy(&grp->lg_lacp_lock);
150 	cv_destroy(&grp->lg_lacp_cv);
151 	mutex_destroy(&grp->lg_port_lock);
152 	cv_destroy(&grp->lg_port_cv);
153 	rw_destroy(&grp->lg_tx_lock);
154 }
155 
156 void
157 aggr_grp_init(void)
158 {
159 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
160 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
161 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
162 
163 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
164 	    GRP_HASHSZ, mod_hash_null_valdtor);
165 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
166 	aggr_grp_cnt = 0;
167 
168 	/*
169 	 * Allocate an id space to manage key values (when key is not
170 	 * specified). The range of the id space will be from
171 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
172 	 * uses a 16-bit key.
173 	 */
174 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
175 	ASSERT(key_ids != NULL);
176 }
177 
178 void
179 aggr_grp_fini(void)
180 {
181 	id_space_destroy(key_ids);
182 	rw_destroy(&aggr_grp_lock);
183 	mod_hash_destroy_idhash(aggr_grp_hash);
184 	kmem_cache_destroy(aggr_grp_cache);
185 }
186 
187 uint_t
188 aggr_grp_count(void)
189 {
190 	uint_t	count;
191 
192 	rw_enter(&aggr_grp_lock, RW_READER);
193 	count = aggr_grp_cnt;
194 	rw_exit(&aggr_grp_lock);
195 	return (count);
196 }
197 
198 /*
199  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
200  * requires the mac perimeter, this function holds a reference of the aggr
201  * and aggr won't call mac_unregister() until this reference drops to 0.
202  */
203 void
204 aggr_grp_port_hold(aggr_port_t *port)
205 {
206 	aggr_grp_t	*grp = port->lp_grp;
207 
208 	AGGR_PORT_REFHOLD(port);
209 	mutex_enter(&grp->lg_port_lock);
210 	grp->lg_port_ref++;
211 	mutex_exit(&grp->lg_port_lock);
212 }
213 
214 /*
215  * Release the reference of the grp and inform aggr_grp_delete() calling
216  * mac_unregister() is now safe.
217  */
218 void
219 aggr_grp_port_rele(aggr_port_t *port)
220 {
221 	aggr_grp_t	*grp = port->lp_grp;
222 
223 	mutex_enter(&grp->lg_port_lock);
224 	if (--grp->lg_port_ref == 0)
225 		cv_signal(&grp->lg_port_cv);
226 	mutex_exit(&grp->lg_port_lock);
227 	AGGR_PORT_REFRELE(port);
228 }
229 
230 /*
231  * Wait for the port's lacp timer thread and the port's notification callback
232  * to exit.
233  */
234 void
235 aggr_grp_port_wait(aggr_grp_t *grp)
236 {
237 	mutex_enter(&grp->lg_port_lock);
238 	if (grp->lg_port_ref != 0)
239 		cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
240 	mutex_exit(&grp->lg_port_lock);
241 }
242 
243 /*
244  * Attach a port to a link aggregation group.
245  *
246  * A port is attached to a link aggregation group once its speed
247  * and link state have been verified.
248  *
249  * Returns B_TRUE if the group link state or speed has changed. If
250  * it's the case, the caller must notify the MAC layer via a call
251  * to mac_link().
252  */
253 boolean_t
254 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
255 {
256 	boolean_t link_state_changed = B_FALSE;
257 
258 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
259 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
260 
261 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
262 		return (B_FALSE);
263 
264 	/*
265 	 * Validate the MAC port link speed and update the group
266 	 * link speed if needed.
267 	 */
268 	if (port->lp_ifspeed == 0 ||
269 	    port->lp_link_state != LINK_STATE_UP ||
270 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
271 		/*
272 		 * Can't attach a MAC port with unknown link speed,
273 		 * down link, or not in full duplex mode.
274 		 */
275 		return (B_FALSE);
276 	}
277 
278 	if (grp->lg_ifspeed == 0) {
279 		/*
280 		 * The group inherits the speed of the first link being
281 		 * attached.
282 		 */
283 		grp->lg_ifspeed = port->lp_ifspeed;
284 		link_state_changed = B_TRUE;
285 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
286 		/*
287 		 * The link speed of the MAC port must be the same as
288 		 * the group link speed, as per 802.3ad. Since it is
289 		 * not, the attach is cancelled.
290 		 */
291 		return (B_FALSE);
292 	}
293 
294 	grp->lg_nattached_ports++;
295 
296 	/*
297 	 * Update the group link state.
298 	 */
299 	if (grp->lg_link_state != LINK_STATE_UP) {
300 		grp->lg_link_state = LINK_STATE_UP;
301 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
302 		link_state_changed = B_TRUE;
303 	}
304 
305 	aggr_grp_multicst_port(port, B_TRUE);
306 
307 	/*
308 	 * Update port's state.
309 	 */
310 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
311 
312 	/*
313 	 * Set port's receive callback
314 	 */
315 	mac_rx_set(port->lp_mch, aggr_recv_cb, port);
316 
317 	/*
318 	 * If LACP is OFF, the port can be used to send data as soon
319 	 * as its link is up and verified to be compatible with the
320 	 * aggregation.
321 	 *
322 	 * If LACP is active or passive, notify the LACP subsystem, which
323 	 * will enable sending on the port following the LACP protocol.
324 	 */
325 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
326 		aggr_send_port_enable(port);
327 	else
328 		aggr_lacp_port_attached(port);
329 
330 	return (link_state_changed);
331 }
332 
333 boolean_t
334 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
335 {
336 	boolean_t link_state_changed = B_FALSE;
337 
338 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
339 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
340 
341 	/* update state */
342 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
343 		return (B_FALSE);
344 
345 	mac_rx_clear(port->lp_mch);
346 
347 	aggr_grp_multicst_port(port, B_FALSE);
348 
349 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
350 		aggr_send_port_disable(port);
351 	else
352 		aggr_lacp_port_detached(port);
353 
354 	port->lp_state = AGGR_PORT_STATE_STANDBY;
355 
356 	grp->lg_nattached_ports--;
357 	if (grp->lg_nattached_ports == 0) {
358 		/* the last attached MAC port of the group is being detached */
359 		grp->lg_ifspeed = 0;
360 		grp->lg_link_state = LINK_STATE_DOWN;
361 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
362 		link_state_changed = B_TRUE;
363 	}
364 
365 	return (link_state_changed);
366 }
367 
368 /*
369  * Update the MAC addresses of the constituent ports of the specified
370  * group. This function is invoked:
371  * - after creating a new aggregation group.
372  * - after adding new ports to an aggregation group.
373  * - after removing a port from a group when the MAC address of
374  *   that port was used for the MAC address of the group.
375  * - after the MAC address of a port changed when the MAC address
376  *   of that port was used for the MAC address of the group.
377  *
378  * Return true if the link state of the aggregation changed, for example
379  * as a result of a failure changing the MAC address of one of the
380  * constituent ports.
381  */
382 boolean_t
383 aggr_grp_update_ports_mac(aggr_grp_t *grp)
384 {
385 	aggr_port_t *cport;
386 	boolean_t link_state_changed = B_FALSE;
387 	mac_perim_handle_t mph;
388 
389 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
390 
391 	for (cport = grp->lg_ports; cport != NULL;
392 	    cport = cport->lp_next) {
393 		mac_perim_enter_by_mh(cport->lp_mh, &mph);
394 		if (aggr_port_unicst(cport) != 0) {
395 			if (aggr_grp_detach_port(grp, cport))
396 				link_state_changed = B_TRUE;
397 		} else {
398 			/*
399 			 * If a port was detached because of a previous
400 			 * failure changing the MAC address, the port is
401 			 * reattached when it successfully changes the MAC
402 			 * address now, and this might cause the link state
403 			 * of the aggregation to change.
404 			 */
405 			if (aggr_grp_attach_port(grp, cport))
406 				link_state_changed = B_TRUE;
407 		}
408 		mac_perim_exit(mph);
409 	}
410 	return (link_state_changed);
411 }
412 
413 /*
414  * Invoked when the MAC address of a port has changed. If the port's
415  * MAC address was used for the group MAC address, set mac_addr_changedp
416  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
417  * notification. If the link state changes due to detach/attach of
418  * the constituent port, set link_state_changedp to B_TRUE to indicate
419  * to the caller that it should send a MAC_NOTE_LINK notification. In both
420  * cases, it is the responsibility of the caller to invoke notification
421  * functions after releasing the the port lock.
422  */
423 void
424 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
425     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
426 {
427 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
428 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
429 	ASSERT(mac_addr_changedp != NULL);
430 	ASSERT(link_state_changedp != NULL);
431 
432 	*mac_addr_changedp = B_FALSE;
433 	*link_state_changedp = B_FALSE;
434 
435 	if (grp->lg_addr_fixed) {
436 		/*
437 		 * The group is using a fixed MAC address or an automatic
438 		 * MAC address has not been set.
439 		 */
440 		return;
441 	}
442 
443 	if (grp->lg_mac_addr_port == port) {
444 		/*
445 		 * The MAC address of the port was assigned to the group
446 		 * MAC address. Update the group MAC address.
447 		 */
448 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
449 		*mac_addr_changedp = B_TRUE;
450 	} else {
451 		/*
452 		 * Update the actual port MAC address to the MAC address
453 		 * of the group.
454 		 */
455 		if (aggr_port_unicst(port) != 0) {
456 			*link_state_changedp = aggr_grp_detach_port(grp, port);
457 		} else {
458 			/*
459 			 * If a port was detached because of a previous
460 			 * failure changing the MAC address, the port is
461 			 * reattached when it successfully changes the MAC
462 			 * address now, and this might cause the link state
463 			 * of the aggregation to change.
464 			 */
465 			*link_state_changedp = aggr_grp_attach_port(grp, port);
466 		}
467 	}
468 }
469 
470 /*
471  * Add a port to a link aggregation group.
472  */
473 static int
474 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
475     aggr_port_t **pp)
476 {
477 	aggr_port_t *port, **cport;
478 	mac_perim_handle_t mph;
479 	int err;
480 
481 	/*
482 	 * lg_mh could be NULL when the function is called during the creation
483 	 * of the aggregation.
484 	 */
485 	ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
486 
487 	/* create new port */
488 	err = aggr_port_create(grp, port_linkid, force, &port);
489 	if (err != 0)
490 		return (err);
491 
492 	mac_perim_enter_by_mh(port->lp_mh, &mph);
493 
494 	/* add port to list of group constituent ports */
495 	cport = &grp->lg_ports;
496 	while (*cport != NULL)
497 		cport = &((*cport)->lp_next);
498 	*cport = port;
499 
500 	/*
501 	 * Back reference to the group it is member of. A port always
502 	 * holds a reference to its group to ensure that the back
503 	 * reference is always valid.
504 	 */
505 	port->lp_grp = grp;
506 	AGGR_GRP_REFHOLD(grp);
507 	grp->lg_nports++;
508 
509 	aggr_lacp_init_port(port);
510 	mac_perim_exit(mph);
511 
512 	if (pp != NULL)
513 		*pp = port;
514 
515 	return (0);
516 }
517 
518 /*
519  * Add a pseudo Rx ring for the given HW ring handle.
520  */
521 static int
522 aggr_add_pseudo_rx_ring(aggr_port_t *port,
523     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
524 {
525 	aggr_pseudo_rx_ring_t	*ring;
526 	int			err;
527 	int			j;
528 
529 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
530 		ring = rx_grp->arg_rings + j;
531 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
532 			break;
533 	}
534 
535 	/*
536 	 * No slot for this new Rx ring.
537 	 */
538 	if (j == MAX_RINGS_PER_GROUP)
539 		return (EIO);
540 
541 	ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
542 	ring->arr_hw_rh = hw_rh;
543 	ring->arr_port = port;
544 	rx_grp->arg_ring_cnt++;
545 
546 	/*
547 	 * The group is already registered, dynamically add a new ring to the
548 	 * mac group.
549 	 */
550 	mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring);
551 	if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
552 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
553 		ring->arr_hw_rh = NULL;
554 		ring->arr_port = NULL;
555 		rx_grp->arg_ring_cnt--;
556 		mac_hwring_teardown(hw_rh);
557 	}
558 	return (err);
559 }
560 
561 /*
562  * Remove the pseudo Rx ring of the given HW ring handle.
563  */
564 static void
565 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
566 {
567 	aggr_pseudo_rx_ring_t	*ring;
568 	int			j;
569 
570 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
571 		ring = rx_grp->arg_rings + j;
572 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
573 		    ring->arr_hw_rh != hw_rh) {
574 			continue;
575 		}
576 
577 		mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
578 
579 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
580 		ring->arr_hw_rh = NULL;
581 		ring->arr_port = NULL;
582 		rx_grp->arg_ring_cnt--;
583 		mac_hwring_teardown(hw_rh);
584 		break;
585 	}
586 }
587 
588 /*
589  * This function is called to create pseudo rings over the hardware rings of
590  * the underlying device. Note that there is a 1:1 mapping between the pseudo
591  * RX rings of the aggr and the hardware rings of the underlying port.
592  */
593 static int
594 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
595 {
596 	aggr_grp_t		*grp = port->lp_grp;
597 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
598 	aggr_unicst_addr_t	*addr, *a;
599 	mac_perim_handle_t	pmph;
600 	int			hw_rh_cnt, i = 0, j;
601 	int			err = 0;
602 
603 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
604 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
605 
606 	/*
607 	 * This function must be called after the aggr registers its mac
608 	 * and its RX group has been initialized.
609 	 */
610 	ASSERT(rx_grp->arg_gh != NULL);
611 
612 	/*
613 	 * Get the list the the underlying HW rings.
614 	 */
615 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh);
616 
617 	if (port->lp_hwgh != NULL) {
618 		/*
619 		 * Quiesce the HW ring and the mac srs on the ring. Note
620 		 * that the HW ring will be restarted when the pseudo ring
621 		 * is started. At that time all the packets will be
622 		 * directly passed up to the pseudo RX ring and handled
623 		 * by mac srs created over the pseudo RX ring.
624 		 */
625 		mac_rx_client_quiesce(port->lp_mch);
626 		mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
627 	}
628 
629 	/*
630 	 * Add all the unicast addresses to the newly added port.
631 	 */
632 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
633 		if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
634 			break;
635 	}
636 
637 	for (i = 0; err == 0 && i < hw_rh_cnt; i++)
638 		err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
639 
640 	if (err != 0) {
641 		for (j = 0; j < i; j++)
642 			aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
643 
644 		for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
645 			aggr_port_remmac(port, a->aua_addr);
646 
647 		if (port->lp_hwgh != NULL) {
648 			mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
649 			mac_rx_client_restart(port->lp_mch);
650 			port->lp_hwgh = NULL;
651 		}
652 	} else {
653 		port->lp_grp_added = B_TRUE;
654 	}
655 done:
656 	mac_perim_exit(pmph);
657 	return (err);
658 }
659 
660 /*
661  * This function is called by aggr to remove pseudo RX rings over the
662  * HW rings of the underlying port.
663  */
664 static void
665 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
666 {
667 	aggr_grp_t		*grp = port->lp_grp;
668 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
669 	aggr_unicst_addr_t	*addr;
670 	mac_group_handle_t	hwgh;
671 	mac_perim_handle_t	pmph;
672 	int			hw_rh_cnt, i;
673 
674 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
675 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
676 
677 	if (!port->lp_grp_added)
678 		goto done;
679 
680 	ASSERT(rx_grp->arg_gh != NULL);
681 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh);
682 
683 	/*
684 	 * If hw_rh_cnt is 0, it means that the underlying port does not
685 	 * support RX rings. Directly return in this case.
686 	 */
687 	for (i = 0; i < hw_rh_cnt; i++)
688 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
689 
690 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
691 		aggr_port_remmac(port, addr->aua_addr);
692 
693 	if (port->lp_hwgh != NULL) {
694 		port->lp_hwgh = NULL;
695 
696 		/*
697 		 * First clear the permanent-quiesced flag of the RX srs then
698 		 * restart the HW ring and the mac srs on the ring. Note that
699 		 * the HW ring and associated SRS will soon been removed when
700 		 * the port is removed from the aggr.
701 		 */
702 		mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
703 		mac_rx_client_restart(port->lp_mch);
704 	}
705 
706 	port->lp_grp_added = B_FALSE;
707 done:
708 	mac_perim_exit(pmph);
709 }
710 
711 static int
712 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
713 {
714 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
715 	return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
716 }
717 
718 static int
719 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
720 {
721 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
722 	return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
723 }
724 
725 static int
726 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
727 {
728 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
729 	int err;
730 
731 	err = mac_hwring_start(rr_ring->arr_hw_rh);
732 	if (err == 0)
733 		rr_ring->arr_gen = mr_gen;
734 	return (err);
735 }
736 
737 static void
738 aggr_pseudo_stop_ring(mac_ring_driver_t arg)
739 {
740 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
741 	mac_hwring_stop(rr_ring->arr_hw_rh);
742 }
743 
744 /*
745  * Add one or more ports to an existing link aggregation group.
746  */
747 int
748 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
749     laioc_port_t *ports)
750 {
751 	int rc, i, nadded = 0;
752 	aggr_grp_t *grp = NULL;
753 	aggr_port_t *port;
754 	boolean_t link_state_changed = B_FALSE;
755 	mac_perim_handle_t mph, pmph;
756 
757 	/* get group corresponding to linkid */
758 	rw_enter(&aggr_grp_lock, RW_READER);
759 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
760 	    (mod_hash_val_t *)&grp) != 0) {
761 		rw_exit(&aggr_grp_lock);
762 		return (ENOENT);
763 	}
764 	AGGR_GRP_REFHOLD(grp);
765 
766 	/*
767 	 * Hold the perimeter so that the aggregation won't be destroyed.
768 	 */
769 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
770 	rw_exit(&aggr_grp_lock);
771 
772 	/* add the specified ports to group */
773 	for (i = 0; i < nports; i++) {
774 		/* add port to group */
775 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
776 		    force, &port)) != 0) {
777 			goto bail;
778 		}
779 		ASSERT(port != NULL);
780 		nadded++;
781 
782 		/* check capabilities */
783 		if (!aggr_grp_capab_check(grp, port) ||
784 		    !aggr_grp_sdu_check(grp, port) ||
785 		    !aggr_grp_margin_check(grp, port)) {
786 			rc = ENOTSUP;
787 			goto bail;
788 		}
789 
790 		/*
791 		 * Create the pseudo ring for each HW ring of the underlying
792 		 * port.
793 		 */
794 		rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
795 		if (rc != 0)
796 			goto bail;
797 
798 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
799 
800 		/* set LACP mode */
801 		aggr_port_lacp_set_mode(grp, port);
802 
803 		/* start port if group has already been started */
804 		if (grp->lg_started) {
805 			rc = aggr_port_start(port);
806 			if (rc != 0) {
807 				mac_perim_exit(pmph);
808 				goto bail;
809 			}
810 
811 			/*
812 			 * Turn on the promiscuous mode over the port when it
813 			 * is requested to be turned on to receive the
814 			 * non-primary address over a port, or the promiscous
815 			 * mode is enabled over the aggr.
816 			 */
817 			if (grp->lg_promisc || port->lp_prom_addr != NULL) {
818 				rc = aggr_port_promisc(port, B_TRUE);
819 				if (rc != 0) {
820 					mac_perim_exit(pmph);
821 					goto bail;
822 				}
823 			}
824 		}
825 		mac_perim_exit(pmph);
826 
827 		/*
828 		 * Attach each port if necessary.
829 		 */
830 		if (aggr_port_notify_link(grp, port))
831 			link_state_changed = B_TRUE;
832 
833 		/*
834 		 * Initialize the callback functions for this port.
835 		 */
836 		aggr_port_init_callbacks(port);
837 	}
838 
839 	/* update the MAC address of the constituent ports */
840 	if (aggr_grp_update_ports_mac(grp))
841 		link_state_changed = B_TRUE;
842 
843 	if (link_state_changed)
844 		mac_link_update(grp->lg_mh, grp->lg_link_state);
845 
846 bail:
847 	if (rc != 0) {
848 		/* stop and remove ports that have been added */
849 		for (i = 0; i < nadded; i++) {
850 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
851 			ASSERT(port != NULL);
852 			if (grp->lg_started) {
853 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
854 				(void) aggr_port_promisc(port, B_FALSE);
855 				aggr_port_stop(port);
856 				mac_perim_exit(pmph);
857 			}
858 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
859 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
860 		}
861 	}
862 
863 	if (rc == 0)
864 		mac_resource_update(grp->lg_mh);
865 	mac_perim_exit(mph);
866 	AGGR_GRP_REFRELE(grp);
867 	return (rc);
868 }
869 
870 static int
871 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
872     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
873     aggr_lacp_timer_t lacp_timer)
874 {
875 	boolean_t mac_addr_changed = B_FALSE;
876 	boolean_t link_state_changed = B_FALSE;
877 	mac_perim_handle_t pmph;
878 
879 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
880 
881 	/* validate fixed address if specified */
882 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
883 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
884 	    (mac_addr[0] & 0x01))) {
885 		return (EINVAL);
886 	}
887 
888 	/* update policy if requested */
889 	if (update_mask & AGGR_MODIFY_POLICY)
890 		aggr_send_update_policy(grp, policy);
891 
892 	/* update unicast MAC address if requested */
893 	if (update_mask & AGGR_MODIFY_MAC) {
894 		if (mac_fixed) {
895 			/* user-supplied MAC address */
896 			grp->lg_mac_addr_port = NULL;
897 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
898 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
899 				mac_addr_changed = B_TRUE;
900 			}
901 		} else if (grp->lg_addr_fixed) {
902 			/* switch from user-supplied to automatic */
903 			aggr_port_t *port = grp->lg_ports;
904 
905 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
906 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
907 			grp->lg_mac_addr_port = port;
908 			mac_addr_changed = B_TRUE;
909 			mac_perim_exit(pmph);
910 		}
911 		grp->lg_addr_fixed = mac_fixed;
912 	}
913 
914 	if (mac_addr_changed)
915 		link_state_changed = aggr_grp_update_ports_mac(grp);
916 
917 	if (update_mask & AGGR_MODIFY_LACP_MODE)
918 		aggr_lacp_update_mode(grp, lacp_mode);
919 
920 	if (update_mask & AGGR_MODIFY_LACP_TIMER)
921 		aggr_lacp_update_timer(grp, lacp_timer);
922 
923 	if (link_state_changed)
924 		mac_link_update(grp->lg_mh, grp->lg_link_state);
925 
926 	if (mac_addr_changed)
927 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
928 
929 	return (0);
930 }
931 
932 /*
933  * Update properties of an existing link aggregation group.
934  */
935 int
936 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
937     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
938     aggr_lacp_timer_t lacp_timer)
939 {
940 	aggr_grp_t *grp = NULL;
941 	mac_perim_handle_t mph;
942 	int err;
943 
944 	/* get group corresponding to linkid */
945 	rw_enter(&aggr_grp_lock, RW_READER);
946 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
947 	    (mod_hash_val_t *)&grp) != 0) {
948 		rw_exit(&aggr_grp_lock);
949 		return (ENOENT);
950 	}
951 	AGGR_GRP_REFHOLD(grp);
952 
953 	/*
954 	 * Hold the perimeter so that the aggregation won't be destroyed.
955 	 */
956 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
957 	rw_exit(&aggr_grp_lock);
958 
959 	err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
960 	    mac_addr, lacp_mode, lacp_timer);
961 
962 	mac_perim_exit(mph);
963 	AGGR_GRP_REFRELE(grp);
964 	return (err);
965 }
966 
967 /*
968  * Create a new link aggregation group upon request from administrator.
969  * Returns 0 on success, an errno on failure.
970  */
971 int
972 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
973     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
974     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
975 {
976 	aggr_grp_t *grp = NULL;
977 	aggr_port_t *port;
978 	mac_register_t *mac;
979 	boolean_t link_state_changed;
980 	mac_perim_handle_t mph;
981 	int err;
982 	int i;
983 
984 	/* need at least one port */
985 	if (nports == 0)
986 		return (EINVAL);
987 
988 	rw_enter(&aggr_grp_lock, RW_WRITER);
989 
990 	/* does a group with the same linkid already exist? */
991 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
992 	    (mod_hash_val_t *)&grp);
993 	if (err == 0) {
994 		rw_exit(&aggr_grp_lock);
995 		return (EEXIST);
996 	}
997 
998 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
999 
1000 	grp->lg_refs = 1;
1001 	grp->lg_closing = B_FALSE;
1002 	grp->lg_force = force;
1003 	grp->lg_linkid = linkid;
1004 	grp->lg_ifspeed = 0;
1005 	grp->lg_link_state = LINK_STATE_UNKNOWN;
1006 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1007 	grp->lg_started = B_FALSE;
1008 	grp->lg_promisc = B_FALSE;
1009 	grp->lg_lacp_done = B_FALSE;
1010 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1011 	grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1012 	    aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1013 	bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1014 	aggr_lacp_init_grp(grp);
1015 
1016 	/* add MAC ports to group */
1017 	grp->lg_ports = NULL;
1018 	grp->lg_nports = 0;
1019 	grp->lg_nattached_ports = 0;
1020 	grp->lg_ntx_ports = 0;
1021 
1022 	/*
1023 	 * If key is not specified by the user, allocate the key.
1024 	 */
1025 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1026 		err = ENOMEM;
1027 		goto bail;
1028 	}
1029 	grp->lg_key = key;
1030 
1031 	for (i = 0; i < nports; i++) {
1032 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1033 		if (err != 0)
1034 			goto bail;
1035 	}
1036 
1037 	/*
1038 	 * If no explicit MAC address was specified by the administrator,
1039 	 * set it to the MAC address of the first port.
1040 	 */
1041 	grp->lg_addr_fixed = mac_fixed;
1042 	if (grp->lg_addr_fixed) {
1043 		/* validate specified address */
1044 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1045 			err = EINVAL;
1046 			goto bail;
1047 		}
1048 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1049 	} else {
1050 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1051 		grp->lg_mac_addr_port = grp->lg_ports;
1052 	}
1053 
1054 	/* set the initial group capabilities */
1055 	aggr_grp_capab_set(grp);
1056 
1057 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1058 		err = ENOMEM;
1059 		goto bail;
1060 	}
1061 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1062 	mac->m_driver = grp;
1063 	mac->m_dip = aggr_dip;
1064 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1065 	mac->m_src_addr = grp->lg_addr;
1066 	mac->m_callbacks = &aggr_m_callbacks;
1067 	mac->m_min_sdu = 0;
1068 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1069 	mac->m_margin = aggr_grp_max_margin(grp);
1070 	mac->m_v12n = MAC_VIRT_LEVEL1;
1071 	err = mac_register(mac, &grp->lg_mh);
1072 	mac_free(mac);
1073 	if (err != 0)
1074 		goto bail;
1075 
1076 	if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) {
1077 		(void) mac_unregister(grp->lg_mh);
1078 		grp->lg_mh = NULL;
1079 		goto bail;
1080 	}
1081 
1082 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1083 
1084 	/*
1085 	 * Update the MAC address of the constituent ports.
1086 	 * None of the port is attached at this time, the link state of the
1087 	 * aggregation will not change.
1088 	 */
1089 	link_state_changed = aggr_grp_update_ports_mac(grp);
1090 	ASSERT(!link_state_changed);
1091 
1092 	/* update outbound load balancing policy */
1093 	aggr_send_update_policy(grp, policy);
1094 
1095 	/* set LACP mode */
1096 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1097 
1098 	/*
1099 	 * Attach each port if necessary.
1100 	 */
1101 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1102 		/*
1103 		 * Create the pseudo ring for each HW ring of the underlying
1104 		 * port. Note that this is done after the aggr registers the
1105 		 * mac.
1106 		 */
1107 		VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1108 		if (aggr_port_notify_link(grp, port))
1109 			link_state_changed = B_TRUE;
1110 
1111 		/*
1112 		 * Initialize the callback functions for this port.
1113 		 */
1114 		aggr_port_init_callbacks(port);
1115 	}
1116 
1117 	if (link_state_changed)
1118 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1119 
1120 	/* add new group to hash table */
1121 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1122 	    (mod_hash_val_t)grp);
1123 	ASSERT(err == 0);
1124 	aggr_grp_cnt++;
1125 
1126 	mac_perim_exit(mph);
1127 	rw_exit(&aggr_grp_lock);
1128 	return (0);
1129 
1130 bail:
1131 
1132 	grp->lg_closing = B_TRUE;
1133 
1134 	port = grp->lg_ports;
1135 	while (port != NULL) {
1136 		aggr_port_t *cport;
1137 
1138 		cport = port->lp_next;
1139 		aggr_port_delete(port);
1140 		port = cport;
1141 	}
1142 
1143 	/*
1144 	 * Inform the lacp_rx thread to exit.
1145 	 */
1146 	mutex_enter(&grp->lg_lacp_lock);
1147 	grp->lg_lacp_done = B_TRUE;
1148 	cv_signal(&grp->lg_lacp_cv);
1149 	while (grp->lg_lacp_rx_thread != NULL)
1150 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1151 	mutex_exit(&grp->lg_lacp_lock);
1152 
1153 	rw_exit(&aggr_grp_lock);
1154 	AGGR_GRP_REFRELE(grp);
1155 	return (err);
1156 }
1157 
1158 /*
1159  * Return a pointer to the member of a group with specified linkid.
1160  */
1161 static aggr_port_t *
1162 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1163 {
1164 	aggr_port_t *port;
1165 
1166 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1167 
1168 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1169 		if (port->lp_linkid == linkid)
1170 			break;
1171 	}
1172 
1173 	return (port);
1174 }
1175 
1176 /*
1177  * Stop, detach and remove a port from a link aggregation group.
1178  */
1179 static int
1180 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1181     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1182 {
1183 	int rc = 0;
1184 	aggr_port_t **pport;
1185 	boolean_t mac_addr_changed = B_FALSE;
1186 	boolean_t link_state_changed = B_FALSE;
1187 	mac_perim_handle_t mph;
1188 	uint64_t val;
1189 	uint_t i;
1190 	uint_t stat;
1191 
1192 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1193 	ASSERT(grp->lg_nports > 1);
1194 	ASSERT(!grp->lg_closing);
1195 
1196 	/* unlink port */
1197 	for (pport = &grp->lg_ports; *pport != port;
1198 	    pport = &(*pport)->lp_next) {
1199 		if (*pport == NULL) {
1200 			rc = ENOENT;
1201 			goto done;
1202 		}
1203 	}
1204 	*pport = port->lp_next;
1205 
1206 	mac_perim_enter_by_mh(port->lp_mh, &mph);
1207 
1208 	/*
1209 	 * If the MAC address of the port being removed was assigned
1210 	 * to the group, update the group MAC address
1211 	 * using the MAC address of a different port.
1212 	 */
1213 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1214 		/*
1215 		 * Set the MAC address of the group to the
1216 		 * MAC address of its first port.
1217 		 */
1218 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1219 		grp->lg_mac_addr_port = grp->lg_ports;
1220 		mac_addr_changed = B_TRUE;
1221 	}
1222 
1223 	link_state_changed = aggr_grp_detach_port(grp, port);
1224 
1225 	/*
1226 	 * Add the counter statistics of the ports while it was aggregated
1227 	 * to the group's residual statistics.  This is done by obtaining
1228 	 * the current counter from the underlying MAC then subtracting the
1229 	 * value of the counter at the moment it was added to the
1230 	 * aggregation.
1231 	 */
1232 	for (i = 0; i < MAC_NSTAT; i++) {
1233 		stat = i + MAC_STAT_MIN;
1234 		if (!MAC_STAT_ISACOUNTER(stat))
1235 			continue;
1236 		val = aggr_port_stat(port, stat);
1237 		val -= port->lp_stat[i];
1238 		grp->lg_stat[i] += val;
1239 	}
1240 	for (i = 0; i < ETHER_NSTAT; i++) {
1241 		stat = i + MACTYPE_STAT_MIN;
1242 		if (!ETHER_STAT_ISACOUNTER(stat))
1243 			continue;
1244 		val = aggr_port_stat(port, stat);
1245 		val -= port->lp_ether_stat[i];
1246 		grp->lg_ether_stat[i] += val;
1247 	}
1248 
1249 	grp->lg_nports--;
1250 	mac_perim_exit(mph);
1251 
1252 	aggr_port_delete(port);
1253 
1254 	/*
1255 	 * If the group MAC address has changed, update the MAC address of
1256 	 * the remaining constituent ports according to the new MAC
1257 	 * address of the group.
1258 	 */
1259 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1260 		link_state_changed = B_TRUE;
1261 
1262 done:
1263 	if (mac_addr_changedp != NULL)
1264 		*mac_addr_changedp = mac_addr_changed;
1265 	if (link_state_changedp != NULL)
1266 		*link_state_changedp = link_state_changed;
1267 
1268 	return (rc);
1269 }
1270 
1271 /*
1272  * Remove one or more ports from an existing link aggregation group.
1273  */
1274 int
1275 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1276 {
1277 	int rc = 0, i;
1278 	aggr_grp_t *grp = NULL;
1279 	aggr_port_t *port;
1280 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1281 	boolean_t link_state_update = B_FALSE, link_state_changed;
1282 	mac_perim_handle_t mph, pmph;
1283 
1284 	/* get group corresponding to linkid */
1285 	rw_enter(&aggr_grp_lock, RW_READER);
1286 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1287 	    (mod_hash_val_t *)&grp) != 0) {
1288 		rw_exit(&aggr_grp_lock);
1289 		return (ENOENT);
1290 	}
1291 	AGGR_GRP_REFHOLD(grp);
1292 
1293 	/*
1294 	 * Hold the perimeter so that the aggregation won't be destroyed.
1295 	 */
1296 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1297 	rw_exit(&aggr_grp_lock);
1298 
1299 	/* we need to keep at least one port per group */
1300 	if (nports >= grp->lg_nports) {
1301 		rc = EINVAL;
1302 		goto bail;
1303 	}
1304 
1305 	/* first verify that all the groups are valid */
1306 	for (i = 0; i < nports; i++) {
1307 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1308 			/* port not found */
1309 			rc = ENOENT;
1310 			goto bail;
1311 		}
1312 	}
1313 
1314 	/* clear the promiscous mode for the specified ports */
1315 	for (i = 0; i < nports && rc == 0; i++) {
1316 		/* lookup port */
1317 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1318 		ASSERT(port != NULL);
1319 
1320 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1321 		rc = aggr_port_promisc(port, B_FALSE);
1322 		mac_perim_exit(pmph);
1323 	}
1324 	if (rc != 0) {
1325 		for (i = 0; i < nports; i++) {
1326 			port = aggr_grp_port_lookup(grp,
1327 			    ports[i].lp_linkid);
1328 			ASSERT(port != NULL);
1329 
1330 			/*
1331 			 * Turn the promiscuous mode back on if it is required
1332 			 * to receive the non-primary address over a port, or
1333 			 * the promiscous mode is enabled over the aggr.
1334 			 */
1335 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1336 			if (port->lp_started && (grp->lg_promisc ||
1337 			    port->lp_prom_addr != NULL)) {
1338 				(void) aggr_port_promisc(port, B_TRUE);
1339 			}
1340 			mac_perim_exit(pmph);
1341 		}
1342 		goto bail;
1343 	}
1344 
1345 	/* remove the specified ports from group */
1346 	for (i = 0; i < nports; i++) {
1347 		/* lookup port */
1348 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1349 		ASSERT(port != NULL);
1350 
1351 		/* stop port if group has already been started */
1352 		if (grp->lg_started) {
1353 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1354 			aggr_port_stop(port);
1355 			mac_perim_exit(pmph);
1356 		}
1357 
1358 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1359 		/* remove port from group */
1360 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1361 		    &link_state_changed);
1362 		ASSERT(rc == 0);
1363 		mac_addr_update = mac_addr_update || mac_addr_changed;
1364 		link_state_update = link_state_update || link_state_changed;
1365 	}
1366 
1367 bail:
1368 	if (mac_addr_update)
1369 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
1370 	if (link_state_update)
1371 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1372 	if (rc == 0)
1373 		mac_resource_update(grp->lg_mh);
1374 
1375 	mac_perim_exit(mph);
1376 	AGGR_GRP_REFRELE(grp);
1377 
1378 	return (rc);
1379 }
1380 
1381 int
1382 aggr_grp_delete(datalink_id_t linkid)
1383 {
1384 	aggr_grp_t *grp = NULL;
1385 	aggr_port_t *port, *cport;
1386 	datalink_id_t tmpid;
1387 	mod_hash_val_t val;
1388 	mac_perim_handle_t mph, pmph;
1389 	int err;
1390 
1391 	rw_enter(&aggr_grp_lock, RW_WRITER);
1392 
1393 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1394 	    (mod_hash_val_t *)&grp) != 0) {
1395 		rw_exit(&aggr_grp_lock);
1396 		return (ENOENT);
1397 	}
1398 
1399 	/*
1400 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1401 	 * held. Otherwise, it will deadlock if another thread is in
1402 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1403 	 * dls_devnet_destroy() needs to delete.
1404 	 */
1405 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1406 		rw_exit(&aggr_grp_lock);
1407 		return (err);
1408 	}
1409 	ASSERT(linkid == tmpid);
1410 
1411 	/*
1412 	 * Unregister from the MAC service module. Since this can
1413 	 * fail if a client hasn't closed the MAC port, we gracefully
1414 	 * fail the operation.
1415 	 */
1416 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1417 		(void) dls_devnet_create(grp->lg_mh, linkid);
1418 		rw_exit(&aggr_grp_lock);
1419 		return (err);
1420 	}
1421 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1422 	ASSERT(grp == (aggr_grp_t *)val);
1423 
1424 	ASSERT(aggr_grp_cnt > 0);
1425 	aggr_grp_cnt--;
1426 	rw_exit(&aggr_grp_lock);
1427 
1428 	/*
1429 	 * Inform the lacp_rx thread to exit.
1430 	 */
1431 	mutex_enter(&grp->lg_lacp_lock);
1432 	grp->lg_lacp_done = B_TRUE;
1433 	cv_signal(&grp->lg_lacp_cv);
1434 	while (grp->lg_lacp_rx_thread != NULL)
1435 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1436 	mutex_exit(&grp->lg_lacp_lock);
1437 
1438 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1439 
1440 	grp->lg_closing = B_TRUE;
1441 	/* detach and free MAC ports associated with group */
1442 	port = grp->lg_ports;
1443 	while (port != NULL) {
1444 		cport = port->lp_next;
1445 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1446 		if (grp->lg_started)
1447 			aggr_port_stop(port);
1448 		(void) aggr_grp_detach_port(grp, port);
1449 		mac_perim_exit(pmph);
1450 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1451 		aggr_port_delete(port);
1452 		port = cport;
1453 	}
1454 
1455 	mac_perim_exit(mph);
1456 
1457 	/*
1458 	 * Wait for the port's lacp timer thread and its notification callback
1459 	 * to exit before calling mac_unregister() since both needs to access
1460 	 * the mac perimeter of the grp.
1461 	 */
1462 	aggr_grp_port_wait(grp);
1463 
1464 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1465 	grp->lg_mh = NULL;
1466 
1467 	AGGR_GRP_REFRELE(grp);
1468 	return (0);
1469 }
1470 
1471 void
1472 aggr_grp_free(aggr_grp_t *grp)
1473 {
1474 	ASSERT(grp->lg_refs == 0);
1475 	ASSERT(grp->lg_port_ref == 0);
1476 	if (grp->lg_key > AGGR_MAX_KEY) {
1477 		id_free(key_ids, grp->lg_key);
1478 		grp->lg_key = 0;
1479 	}
1480 	kmem_cache_free(aggr_grp_cache, grp);
1481 }
1482 
1483 int
1484 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1485     aggr_grp_info_new_grp_fn_t new_grp_fn,
1486     aggr_grp_info_new_port_fn_t new_port_fn)
1487 {
1488 	aggr_grp_t	*grp;
1489 	aggr_port_t	*port;
1490 	mac_perim_handle_t mph, pmph;
1491 	int		rc = 0;
1492 
1493 	rw_enter(&aggr_grp_lock, RW_READER);
1494 
1495 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1496 	    (mod_hash_val_t *)&grp) != 0) {
1497 		rw_exit(&aggr_grp_lock);
1498 		return (ENOENT);
1499 	}
1500 	AGGR_GRP_REFHOLD(grp);
1501 
1502 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1503 	rw_exit(&aggr_grp_lock);
1504 
1505 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1506 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1507 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1508 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1509 
1510 	if (rc != 0)
1511 		goto bail;
1512 
1513 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1514 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1515 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1516 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1517 		mac_perim_exit(pmph);
1518 
1519 		if (rc != 0)
1520 			goto bail;
1521 	}
1522 
1523 bail:
1524 	mac_perim_exit(mph);
1525 	AGGR_GRP_REFRELE(grp);
1526 	return (rc);
1527 }
1528 
1529 /*ARGSUSED*/
1530 static void
1531 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1532 {
1533 	miocnak(q, mp, 0, ENOTSUP);
1534 }
1535 
1536 static int
1537 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1538 {
1539 	aggr_port_t	*port;
1540 	uint_t		stat_index;
1541 
1542 	/* We only aggregate counter statistics. */
1543 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1544 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1545 		return (ENOTSUP);
1546 	}
1547 
1548 	/*
1549 	 * Counter statistics for a group are computed by aggregating the
1550 	 * counters of the members MACs while they were aggregated, plus
1551 	 * the residual counter of the group itself, which is updated each
1552 	 * time a MAC is removed from the group.
1553 	 */
1554 	*val = 0;
1555 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1556 		/* actual port statistic */
1557 		*val += aggr_port_stat(port, stat);
1558 		/*
1559 		 * minus the port stat when it was added, plus any residual
1560 		 * amount for the group.
1561 		 */
1562 		if (IS_MAC_STAT(stat)) {
1563 			stat_index = stat - MAC_STAT_MIN;
1564 			*val -= port->lp_stat[stat_index];
1565 			*val += grp->lg_stat[stat_index];
1566 		} else if (IS_MACTYPE_STAT(stat)) {
1567 			stat_index = stat - MACTYPE_STAT_MIN;
1568 			*val -= port->lp_ether_stat[stat_index];
1569 			*val += grp->lg_ether_stat[stat_index];
1570 		}
1571 	}
1572 	return (0);
1573 }
1574 
1575 static int
1576 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1577 {
1578 	aggr_grp_t		*grp = arg;
1579 	mac_perim_handle_t	mph;
1580 	int			rval = 0;
1581 
1582 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1583 
1584 	switch (stat) {
1585 	case MAC_STAT_IFSPEED:
1586 		*val = grp->lg_ifspeed;
1587 		break;
1588 
1589 	case ETHER_STAT_LINK_DUPLEX:
1590 		*val = grp->lg_link_duplex;
1591 		break;
1592 
1593 	default:
1594 		/*
1595 		 * For all other statistics, we return the aggregated stat
1596 		 * from the underlying ports.  aggr_grp_stat() will set
1597 		 * rval appropriately if the statistic isn't a counter.
1598 		 */
1599 		rval = aggr_grp_stat(grp, stat, val);
1600 	}
1601 
1602 	mac_perim_exit(mph);
1603 	return (rval);
1604 }
1605 
1606 static int
1607 aggr_m_start(void *arg)
1608 {
1609 	aggr_grp_t *grp = arg;
1610 	aggr_port_t *port;
1611 	mac_perim_handle_t mph, pmph;
1612 
1613 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1614 
1615 	/*
1616 	 * Attempts to start all configured members of the group.
1617 	 * Group members will be attached when their link-up notification
1618 	 * is received.
1619 	 */
1620 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1621 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1622 		if (aggr_port_start(port) != 0) {
1623 			mac_perim_exit(pmph);
1624 			continue;
1625 		}
1626 
1627 		/*
1628 		 * Turn on the promiscuous mode if it is required to receive
1629 		 * the non-primary address over a port, or the promiscous
1630 		 * mode is enabled over the aggr.
1631 		 */
1632 		if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1633 			if (aggr_port_promisc(port, B_TRUE) != 0)
1634 				aggr_port_stop(port);
1635 		}
1636 		mac_perim_exit(pmph);
1637 	}
1638 
1639 	grp->lg_started = B_TRUE;
1640 
1641 	mac_perim_exit(mph);
1642 	return (0);
1643 }
1644 
1645 static void
1646 aggr_m_stop(void *arg)
1647 {
1648 	aggr_grp_t *grp = arg;
1649 	aggr_port_t *port;
1650 	mac_perim_handle_t mph, pmph;
1651 
1652 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1653 
1654 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1655 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1656 
1657 		/* reset port promiscuous mode */
1658 		(void) aggr_port_promisc(port, B_FALSE);
1659 
1660 		aggr_port_stop(port);
1661 		mac_perim_exit(pmph);
1662 	}
1663 
1664 	grp->lg_started = B_FALSE;
1665 	mac_perim_exit(mph);
1666 }
1667 
1668 static int
1669 aggr_m_promisc(void *arg, boolean_t on)
1670 {
1671 	aggr_grp_t *grp = arg;
1672 	aggr_port_t *port;
1673 	boolean_t link_state_changed = B_FALSE;
1674 	mac_perim_handle_t mph, pmph;
1675 
1676 	AGGR_GRP_REFHOLD(grp);
1677 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1678 
1679 	ASSERT(!grp->lg_closing);
1680 
1681 	if (on == grp->lg_promisc)
1682 		goto bail;
1683 
1684 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1685 		int	err = 0;
1686 
1687 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1688 		AGGR_PORT_REFHOLD(port);
1689 		if (!on && (port->lp_prom_addr == NULL))
1690 			err = aggr_port_promisc(port, B_FALSE);
1691 		else if (on && port->lp_started)
1692 			err = aggr_port_promisc(port, B_TRUE);
1693 
1694 		if (err != 0) {
1695 			if (aggr_grp_detach_port(grp, port))
1696 				link_state_changed = B_TRUE;
1697 		} else {
1698 			/*
1699 			 * If a port was detached because of a previous
1700 			 * failure changing the promiscuity, the port
1701 			 * is reattached when it successfully changes
1702 			 * the promiscuity now, and this might cause
1703 			 * the link state of the aggregation to change.
1704 			 */
1705 			if (aggr_grp_attach_port(grp, port))
1706 				link_state_changed = B_TRUE;
1707 		}
1708 		mac_perim_exit(pmph);
1709 		AGGR_PORT_REFRELE(port);
1710 	}
1711 
1712 	grp->lg_promisc = on;
1713 
1714 	if (link_state_changed)
1715 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1716 
1717 bail:
1718 	mac_perim_exit(mph);
1719 	AGGR_GRP_REFRELE(grp);
1720 
1721 	return (0);
1722 }
1723 
1724 static void
1725 aggr_grp_port_rename(const char *new_name, void *arg)
1726 {
1727 	/*
1728 	 * aggr port's mac client name is the format of "aggr link name" plus
1729 	 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
1730 	 */
1731 	int aggr_len, link_len, clnt_name_len, i;
1732 	char *str_end, *str_st, *str_del;
1733 	char aggr_name[MAXNAMELEN];
1734 	char link_name[MAXNAMELEN];
1735 	char *clnt_name;
1736 	aggr_grp_t *aggr_grp = arg;
1737 	aggr_port_t *aggr_port = aggr_grp->lg_ports;
1738 
1739 	for (i = 0; i < aggr_grp->lg_nports; i++) {
1740 		clnt_name = mac_client_name(aggr_port->lp_mch);
1741 		clnt_name_len = strlen(clnt_name);
1742 		str_st = clnt_name;
1743 		str_end = &(clnt_name[clnt_name_len]);
1744 		str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
1745 		ASSERT(str_del != NULL);
1746 		aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
1747 		link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
1748 		bzero(aggr_name, MAXNAMELEN);
1749 		bzero(link_name, MAXNAMELEN);
1750 		bcopy(clnt_name, aggr_name, aggr_len);
1751 		bcopy(str_del, link_name, link_len + 1);
1752 		bzero(clnt_name, MAXNAMELEN);
1753 		(void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
1754 		    link_name);
1755 
1756 		(void) mac_rename_primary(aggr_port->lp_mh, NULL);
1757 		aggr_port = aggr_port->lp_next;
1758 	}
1759 }
1760 
1761 /*
1762  * Initialize the capabilities that are advertised for the group
1763  * according to the capabilities of the constituent ports.
1764  */
1765 static boolean_t
1766 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1767 {
1768 	aggr_grp_t *grp = arg;
1769 
1770 	switch (cap) {
1771 	case MAC_CAPAB_HCKSUM: {
1772 		uint32_t *hcksum_txflags = cap_data;
1773 		*hcksum_txflags = grp->lg_hcksum_txflags;
1774 		break;
1775 	}
1776 	case MAC_CAPAB_NO_NATIVEVLAN:
1777 		return (!grp->lg_vlan);
1778 	case MAC_CAPAB_NO_ZCOPY:
1779 		return (!grp->lg_zcopy);
1780 	case MAC_CAPAB_RINGS: {
1781 		mac_capab_rings_t *cap_rings = cap_data;
1782 
1783 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
1784 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
1785 			cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
1786 			cap_rings->mr_rget = aggr_fill_ring;
1787 
1788 			/*
1789 			 * An aggregation advertises only one (pseudo) RX
1790 			 * group, which virtualizes the main/primary group of
1791 			 * the underlying devices.
1792 			 */
1793 			cap_rings->mr_gnum = 1;
1794 			cap_rings->mr_gget = aggr_fill_group;
1795 			cap_rings->mr_gaddring = NULL;
1796 			cap_rings->mr_gremring = NULL;
1797 		} else {
1798 			return (B_FALSE);
1799 		}
1800 		break;
1801 	}
1802 	case MAC_CAPAB_AGGR:
1803 	{
1804 		mac_capab_aggr_t *aggr_cap;
1805 
1806 		if (cap_data != NULL) {
1807 			aggr_cap = cap_data;
1808 			aggr_cap->mca_rename_fn = aggr_grp_port_rename;
1809 			aggr_cap->mca_unicst = aggr_m_unicst;
1810 		}
1811 		return (B_TRUE);
1812 	}
1813 	default:
1814 		return (B_FALSE);
1815 	}
1816 	return (B_TRUE);
1817 }
1818 
1819 /*
1820  * Callback funtion for MAC layer to register groups.
1821  */
1822 static void
1823 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
1824     mac_group_info_t *infop, mac_group_handle_t gh)
1825 {
1826 	aggr_grp_t *grp = arg;
1827 	aggr_pseudo_rx_group_t *rx_group;
1828 
1829 	ASSERT(rtype == MAC_RING_TYPE_RX && index == 0);
1830 	rx_group = &grp->lg_rx_group;
1831 	rx_group->arg_gh = gh;
1832 	rx_group->arg_grp = grp;
1833 
1834 	infop->mgi_driver = (mac_group_driver_t)rx_group;
1835 	infop->mgi_start = NULL;
1836 	infop->mgi_stop = NULL;
1837 	infop->mgi_addmac = aggr_addmac;
1838 	infop->mgi_remmac = aggr_remmac;
1839 	infop->mgi_count = rx_group->arg_ring_cnt;
1840 }
1841 
1842 /*
1843  * Callback funtion for MAC layer to register all rings.
1844  */
1845 static void
1846 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
1847     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
1848 {
1849 	aggr_grp_t	*grp = arg;
1850 
1851 	switch (rtype) {
1852 	case MAC_RING_TYPE_RX: {
1853 		aggr_pseudo_rx_group_t	*rx_group = &grp->lg_rx_group;
1854 		aggr_pseudo_rx_ring_t	*rx_ring;
1855 		mac_intr_t		aggr_mac_intr;
1856 
1857 		ASSERT(rg_index == 0);
1858 
1859 		ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
1860 		rx_ring = rx_group->arg_rings + index;
1861 		rx_ring->arr_rh = rh;
1862 
1863 		/*
1864 		 * Entrypoint to enable interrupt (disable poll) and
1865 		 * disable interrupt (enable poll).
1866 		 */
1867 		aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
1868 		aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
1869 		aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
1870 
1871 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
1872 		infop->mri_start = aggr_pseudo_start_ring;
1873 		infop->mri_stop = aggr_pseudo_stop_ring;
1874 
1875 		infop->mri_intr = aggr_mac_intr;
1876 		infop->mri_poll = aggr_rx_poll;
1877 		break;
1878 	}
1879 	default:
1880 		break;
1881 	}
1882 }
1883 
1884 static mblk_t *
1885 aggr_rx_poll(void *arg, int bytes_to_pickup)
1886 {
1887 	aggr_pseudo_rx_ring_t *rr_ring = arg;
1888 	aggr_port_t *port = rr_ring->arr_port;
1889 	aggr_grp_t *grp = port->lp_grp;
1890 	mblk_t *mp_chain, *mp, **mpp;
1891 
1892 	mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
1893 
1894 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1895 		return (mp_chain);
1896 
1897 	mpp = &mp_chain;
1898 	while ((mp = *mpp) != NULL) {
1899 		if (MBLKL(mp) >= sizeof (struct ether_header)) {
1900 			struct ether_header *ehp;
1901 
1902 			ehp = (struct ether_header *)mp->b_rptr;
1903 			if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
1904 				*mpp = mp->b_next;
1905 				mp->b_next = NULL;
1906 				aggr_recv_lacp(port,
1907 				    (mac_resource_handle_t)rr_ring, mp);
1908 				continue;
1909 			}
1910 		}
1911 
1912 		if (!port->lp_collector_enabled) {
1913 			*mpp = mp->b_next;
1914 			mp->b_next = NULL;
1915 			freemsg(mp);
1916 			continue;
1917 		}
1918 		mpp = &mp->b_next;
1919 	}
1920 	return (mp_chain);
1921 }
1922 
1923 static int
1924 aggr_addmac(void *arg, const uint8_t *mac_addr)
1925 {
1926 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
1927 	aggr_unicst_addr_t	*addr, **pprev;
1928 	aggr_grp_t		*grp = rx_group->arg_grp;
1929 	aggr_port_t		*port, *p;
1930 	mac_perim_handle_t	mph;
1931 	int			err = 0;
1932 
1933 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1934 
1935 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
1936 		mac_perim_exit(mph);
1937 		return (0);
1938 	}
1939 
1940 	/*
1941 	 * Insert this mac address into the list of mac addresses owned by
1942 	 * the aggregation pseudo group.
1943 	 */
1944 	pprev = &rx_group->arg_macaddr;
1945 	while ((addr = *pprev) != NULL) {
1946 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
1947 			mac_perim_exit(mph);
1948 			return (EEXIST);
1949 		}
1950 		pprev = &addr->aua_next;
1951 	}
1952 	addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
1953 	bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
1954 	addr->aua_next = NULL;
1955 	*pprev = addr;
1956 
1957 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1958 		if ((err = aggr_port_addmac(port, mac_addr)) != 0)
1959 			break;
1960 
1961 	if (err != 0) {
1962 		for (p = grp->lg_ports; p != port; p = p->lp_next)
1963 			aggr_port_remmac(p, mac_addr);
1964 
1965 		*pprev = NULL;
1966 		kmem_free(addr, sizeof (aggr_unicst_addr_t));
1967 	}
1968 
1969 	mac_perim_exit(mph);
1970 	return (err);
1971 }
1972 
1973 static int
1974 aggr_remmac(void *arg, const uint8_t *mac_addr)
1975 {
1976 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
1977 	aggr_unicst_addr_t	*addr, **pprev;
1978 	aggr_grp_t		*grp = rx_group->arg_grp;
1979 	aggr_port_t		*port;
1980 	mac_perim_handle_t	mph;
1981 	int			err = 0;
1982 
1983 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1984 
1985 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
1986 		mac_perim_exit(mph);
1987 		return (0);
1988 	}
1989 
1990 	/*
1991 	 * Insert this mac address into the list of mac addresses owned by
1992 	 * the aggregation pseudo group.
1993 	 */
1994 	pprev = &rx_group->arg_macaddr;
1995 	while ((addr = *pprev) != NULL) {
1996 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
1997 			pprev = &addr->aua_next;
1998 			continue;
1999 		}
2000 		break;
2001 	}
2002 	if (addr == NULL) {
2003 		mac_perim_exit(mph);
2004 		return (EINVAL);
2005 	}
2006 
2007 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2008 		aggr_port_remmac(port, mac_addr);
2009 
2010 	*pprev = addr->aua_next;
2011 	kmem_free(addr, sizeof (aggr_unicst_addr_t));
2012 
2013 	mac_perim_exit(mph);
2014 	return (err);
2015 }
2016 
2017 /*
2018  * Add or remove the multicast addresses that are defined for the group
2019  * to or from the specified port.
2020  * This function is called before stopping a port, before a port
2021  * is detached from a group, and when attaching a port to a group.
2022  */
2023 void
2024 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2025 {
2026 	aggr_grp_t *grp = port->lp_grp;
2027 
2028 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
2029 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2030 
2031 	if (!port->lp_started)
2032 		return;
2033 
2034 	mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2035 }
2036 
2037 static int
2038 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2039 {
2040 	aggr_grp_t *grp = arg;
2041 	aggr_port_t *port = NULL;
2042 	mac_perim_handle_t mph;
2043 	int err = 0, cerr;
2044 
2045 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2046 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2047 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
2048 			continue;
2049 		cerr = aggr_port_multicst(port, add, addrp);
2050 		if (cerr != 0 && err == 0)
2051 			err = cerr;
2052 	}
2053 	mac_perim_exit(mph);
2054 	return (err);
2055 }
2056 
2057 static int
2058 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2059 {
2060 	aggr_grp_t *grp = arg;
2061 	mac_perim_handle_t mph;
2062 	int err;
2063 
2064 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2065 	err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2066 	    0, 0);
2067 	mac_perim_exit(mph);
2068 	return (err);
2069 }
2070 
2071 /*
2072  * Initialize the capabilities that are advertised for the group
2073  * according to the capabilities of the constituent ports.
2074  */
2075 static void
2076 aggr_grp_capab_set(aggr_grp_t *grp)
2077 {
2078 	uint32_t cksum;
2079 	aggr_port_t *port;
2080 
2081 	ASSERT(grp->lg_mh == NULL);
2082 	ASSERT(grp->lg_ports != NULL);
2083 
2084 	grp->lg_hcksum_txflags = (uint32_t)-1;
2085 	grp->lg_zcopy = B_TRUE;
2086 	grp->lg_vlan = B_TRUE;
2087 
2088 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2089 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2090 			cksum = 0;
2091 		grp->lg_hcksum_txflags &= cksum;
2092 
2093 		grp->lg_vlan &=
2094 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2095 
2096 		grp->lg_zcopy &=
2097 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2098 	}
2099 }
2100 
2101 /*
2102  * Checks whether the capabilities of the port being added are compatible
2103  * with the current capabilities of the aggregation.
2104  */
2105 static boolean_t
2106 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2107 {
2108 	uint32_t hcksum_txflags;
2109 
2110 	ASSERT(grp->lg_ports != NULL);
2111 
2112 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2113 	    grp->lg_vlan) != grp->lg_vlan) {
2114 		return (B_FALSE);
2115 	}
2116 
2117 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2118 	    grp->lg_zcopy) != grp->lg_zcopy) {
2119 		return (B_FALSE);
2120 	}
2121 
2122 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2123 		if (grp->lg_hcksum_txflags != 0)
2124 			return (B_FALSE);
2125 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2126 	    grp->lg_hcksum_txflags) {
2127 		return (B_FALSE);
2128 	}
2129 
2130 	return (B_TRUE);
2131 }
2132 
2133 /*
2134  * Returns the maximum SDU according to the SDU of the constituent ports.
2135  */
2136 static uint_t
2137 aggr_grp_max_sdu(aggr_grp_t *grp)
2138 {
2139 	uint_t max_sdu = (uint_t)-1;
2140 	aggr_port_t *port;
2141 
2142 	ASSERT(grp->lg_mh == NULL);
2143 	ASSERT(grp->lg_ports != NULL);
2144 
2145 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2146 		uint_t port_sdu_max;
2147 
2148 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2149 		if (max_sdu > port_sdu_max)
2150 			max_sdu = port_sdu_max;
2151 	}
2152 
2153 	return (max_sdu);
2154 }
2155 
2156 /*
2157  * Checks if the maximum SDU of the specified port is compatible
2158  * with the maximum SDU of the specified aggregation group, returns
2159  * B_TRUE if it is, B_FALSE otherwise.
2160  */
2161 static boolean_t
2162 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2163 {
2164 	uint_t port_sdu_max;
2165 
2166 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2167 	return (port_sdu_max >= grp->lg_max_sdu);
2168 }
2169 
2170 /*
2171  * Returns the maximum margin according to the margin of the constituent ports.
2172  */
2173 static uint32_t
2174 aggr_grp_max_margin(aggr_grp_t *grp)
2175 {
2176 	uint32_t margin = UINT32_MAX;
2177 	aggr_port_t *port;
2178 
2179 	ASSERT(grp->lg_mh == NULL);
2180 	ASSERT(grp->lg_ports != NULL);
2181 
2182 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2183 		if (margin > port->lp_margin)
2184 			margin = port->lp_margin;
2185 	}
2186 
2187 	grp->lg_margin = margin;
2188 	return (margin);
2189 }
2190 
2191 /*
2192  * Checks if the maximum margin of the specified port is compatible
2193  * with the maximum margin of the specified aggregation group, returns
2194  * B_TRUE if it is, B_FALSE otherwise.
2195  */
2196 static boolean_t
2197 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2198 {
2199 	if (port->lp_margin >= grp->lg_margin)
2200 		return (B_TRUE);
2201 
2202 	/*
2203 	 * See whether the current margin value is allowed to be changed to
2204 	 * the new value.
2205 	 */
2206 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2207 		return (B_FALSE);
2208 
2209 	grp->lg_margin = port->lp_margin;
2210 	return (B_TRUE);
2211 }
2212