xref: /titanic_51/usr/src/uts/common/io/aggr/aggr_grp.c (revision e77b06d21580f630e0a7c437495ab283d3672828)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
30  *
31  * An instance of the structure aggr_grp_t is allocated for each
32  * link aggregation group. When created, aggr_grp_t objects are
33  * entered into the aggr_grp_hash hash table maintained by the modhash
34  * module. The hash key is the linkid associated with the link
35  * aggregation group.
36  *
37  * A set of MAC ports are associated with each association group.
38  */
39 
40 #include <sys/types.h>
41 #include <sys/sysmacros.h>
42 #include <sys/conf.h>
43 #include <sys/cmn_err.h>
44 #include <sys/list.h>
45 #include <sys/ksynch.h>
46 #include <sys/kmem.h>
47 #include <sys/stream.h>
48 #include <sys/modctl.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/atomic.h>
52 #include <sys/stat.h>
53 #include <sys/modhash.h>
54 #include <sys/id_space.h>
55 #include <sys/strsun.h>
56 #include <sys/dlpi.h>
57 #include <sys/dls.h>
58 #include <sys/vlan.h>
59 #include <sys/aggr.h>
60 #include <sys/aggr_impl.h>
61 
62 static int aggr_m_start(void *);
63 static void aggr_m_stop(void *);
64 static int aggr_m_promisc(void *, boolean_t);
65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
66 static int aggr_m_unicst(void *, const uint8_t *);
67 static int aggr_m_stat(void *, uint_t, uint64_t *);
68 static void aggr_m_resources(void *);
69 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
70 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
71 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
72 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
73     boolean_t *);
74 
75 static void aggr_grp_capab_set(aggr_grp_t *);
76 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
77 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
78 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
79 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
80 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
81 
82 static kmem_cache_t	*aggr_grp_cache;
83 static mod_hash_t	*aggr_grp_hash;
84 static krwlock_t	aggr_grp_lock;
85 static uint_t		aggr_grp_cnt;
86 static id_space_t	*key_ids;
87 
88 #define	GRP_HASHSZ		64
89 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
90 
91 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
92 
93 #define	AGGR_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_IOCTL | MC_GETCAPAB)
94 
95 static mac_callbacks_t aggr_m_callbacks = {
96 	AGGR_M_CALLBACK_FLAGS,
97 	aggr_m_stat,
98 	aggr_m_start,
99 	aggr_m_stop,
100 	aggr_m_promisc,
101 	aggr_m_multicst,
102 	aggr_m_unicst,
103 	aggr_m_tx,
104 	aggr_m_resources,
105 	aggr_m_ioctl,
106 	aggr_m_capab_get
107 };
108 
109 /*ARGSUSED*/
110 static int
111 aggr_grp_constructor(void *buf, void *arg, int kmflag)
112 {
113 	aggr_grp_t *grp = buf;
114 
115 	bzero(grp, sizeof (*grp));
116 	rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL);
117 	mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL);
118 
119 	grp->lg_link_state = LINK_STATE_UNKNOWN;
120 
121 	return (0);
122 }
123 
124 /*ARGSUSED*/
125 static void
126 aggr_grp_destructor(void *buf, void *arg)
127 {
128 	aggr_grp_t *grp = buf;
129 
130 	if (grp->lg_tx_ports != NULL) {
131 		kmem_free(grp->lg_tx_ports,
132 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
133 	}
134 
135 	mutex_destroy(&grp->aggr.gl_lock);
136 	rw_destroy(&grp->lg_lock);
137 }
138 
139 void
140 aggr_grp_init(void)
141 {
142 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
143 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
144 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
145 
146 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
147 	    GRP_HASHSZ, mod_hash_null_valdtor);
148 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
149 	aggr_grp_cnt = 0;
150 
151 	/*
152 	 * Allocate an id space to manage key values (when key is not
153 	 * specified). The range of the id space will be from
154 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
155 	 * uses a 16-bit key.
156 	 */
157 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
158 	ASSERT(key_ids != NULL);
159 }
160 
161 void
162 aggr_grp_fini(void)
163 {
164 	id_space_destroy(key_ids);
165 	rw_destroy(&aggr_grp_lock);
166 	mod_hash_destroy_idhash(aggr_grp_hash);
167 	kmem_cache_destroy(aggr_grp_cache);
168 }
169 
170 uint_t
171 aggr_grp_count(void)
172 {
173 	uint_t	count;
174 
175 	rw_enter(&aggr_grp_lock, RW_READER);
176 	count = aggr_grp_cnt;
177 	rw_exit(&aggr_grp_lock);
178 	return (count);
179 }
180 
181 /*
182  * Attach a port to a link aggregation group.
183  *
184  * A port is attached to a link aggregation group once its speed
185  * and link state have been verified.
186  *
187  * Returns B_TRUE if the group link state or speed has changed. If
188  * it's the case, the caller must notify the MAC layer via a call
189  * to mac_link().
190  */
191 boolean_t
192 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
193 {
194 	boolean_t link_state_changed = B_FALSE;
195 
196 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
197 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
198 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
199 
200 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
201 		return (B_FALSE);
202 
203 	/*
204 	 * Validate the MAC port link speed and update the group
205 	 * link speed if needed.
206 	 */
207 	if (port->lp_ifspeed == 0 ||
208 	    port->lp_link_state != LINK_STATE_UP ||
209 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
210 		/*
211 		 * Can't attach a MAC port with unknown link speed,
212 		 * down link, or not in full duplex mode.
213 		 */
214 		return (B_FALSE);
215 	}
216 
217 	if (grp->lg_ifspeed == 0) {
218 		/*
219 		 * The group inherits the speed of the first link being
220 		 * attached.
221 		 */
222 		grp->lg_ifspeed = port->lp_ifspeed;
223 		link_state_changed = B_TRUE;
224 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
225 		/*
226 		 * The link speed of the MAC port must be the same as
227 		 * the group link speed, as per 802.3ad. Since it is
228 		 * not, the attach is cancelled.
229 		 */
230 		return (B_FALSE);
231 	}
232 
233 	grp->lg_nattached_ports++;
234 
235 	/*
236 	 * Update the group link state.
237 	 */
238 	if (grp->lg_link_state != LINK_STATE_UP) {
239 		grp->lg_link_state = LINK_STATE_UP;
240 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
241 		link_state_changed = B_TRUE;
242 	}
243 
244 	aggr_grp_multicst_port(port, B_TRUE);
245 
246 	/*
247 	 * Update port's state.
248 	 */
249 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
250 
251 	/*
252 	 * Set port's receive callback
253 	 */
254 	port->lp_mrh = mac_rx_add(port->lp_mh, aggr_recv_cb, (void *)port);
255 
256 	/*
257 	 * If LACP is OFF, the port can be used to send data as soon
258 	 * as its link is up and verified to be compatible with the
259 	 * aggregation.
260 	 *
261 	 * If LACP is active or passive, notify the LACP subsystem, which
262 	 * will enable sending on the port following the LACP protocol.
263 	 */
264 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
265 		aggr_send_port_enable(port);
266 	else
267 		aggr_lacp_port_attached(port);
268 
269 	return (link_state_changed);
270 }
271 
272 boolean_t
273 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
274 {
275 	boolean_t link_state_changed = B_FALSE;
276 
277 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
278 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
279 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
280 
281 	/* update state */
282 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
283 		return (B_FALSE);
284 
285 	mac_rx_remove(port->lp_mh, port->lp_mrh, B_FALSE);
286 	port->lp_state = AGGR_PORT_STATE_STANDBY;
287 
288 	aggr_grp_multicst_port(port, B_FALSE);
289 
290 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
291 		aggr_send_port_disable(port);
292 	else
293 		aggr_lacp_port_detached(port);
294 
295 	grp->lg_nattached_ports--;
296 	if (grp->lg_nattached_ports == 0) {
297 		/* the last attached MAC port of the group is being detached */
298 		grp->lg_ifspeed = 0;
299 		grp->lg_link_state = LINK_STATE_DOWN;
300 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
301 		link_state_changed = B_TRUE;
302 	}
303 
304 	return (link_state_changed);
305 }
306 
307 /*
308  * Update the MAC addresses of the constituent ports of the specified
309  * group. This function is invoked:
310  * - after creating a new aggregation group.
311  * - after adding new ports to an aggregation group.
312  * - after removing a port from a group when the MAC address of
313  *   that port was used for the MAC address of the group.
314  * - after the MAC address of a port changed when the MAC address
315  *   of that port was used for the MAC address of the group.
316  *
317  * Return true if the link state of the aggregation changed, for example
318  * as a result of a failure changing the MAC address of one of the
319  * constituent ports.
320  */
321 boolean_t
322 aggr_grp_update_ports_mac(aggr_grp_t *grp)
323 {
324 	aggr_port_t *cport;
325 	boolean_t link_state_changed = B_FALSE;
326 
327 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
328 
329 	if (grp->lg_closing)
330 		return (link_state_changed);
331 
332 	for (cport = grp->lg_ports; cport != NULL;
333 	    cport = cport->lp_next) {
334 		rw_enter(&cport->lp_lock, RW_WRITER);
335 		if (aggr_port_unicst(cport, grp->lg_addr) != 0) {
336 			if (aggr_grp_detach_port(grp, cport))
337 				link_state_changed = B_TRUE;
338 		} else {
339 			/*
340 			 * If a port was detached because of a previous
341 			 * failure changing the MAC address, the port is
342 			 * reattached when it successfully changes the MAC
343 			 * address now, and this might cause the link state
344 			 * of the aggregation to change.
345 			 */
346 			if (aggr_grp_attach_port(grp, cport))
347 				link_state_changed = B_TRUE;
348 		}
349 		rw_exit(&cport->lp_lock);
350 	}
351 	return (link_state_changed);
352 }
353 
354 /*
355  * Invoked when the MAC address of a port has changed. If the port's
356  * MAC address was used for the group MAC address, set mac_addr_changedp
357  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
358  * notification. If the link state changes due to detach/attach of
359  * the constituent port, set link_state_changedp to B_TRUE to indicate
360  * to the caller that it should send a MAC_NOTE_LINK notification. In both
361  * cases, it is the responsibility of the caller to invoke notification
362  * functions after releasing the the port lock.
363  */
364 void
365 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
366     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
367 {
368 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
369 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
370 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
371 	ASSERT(mac_addr_changedp != NULL);
372 	ASSERT(link_state_changedp != NULL);
373 
374 	*mac_addr_changedp = B_FALSE;
375 	*link_state_changedp = B_FALSE;
376 
377 	if (grp->lg_addr_fixed) {
378 		/*
379 		 * The group is using a fixed MAC address or an automatic
380 		 * MAC address has not been set.
381 		 */
382 		return;
383 	}
384 
385 	if (grp->lg_mac_addr_port == port) {
386 		/*
387 		 * The MAC address of the port was assigned to the group
388 		 * MAC address. Update the group MAC address.
389 		 */
390 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
391 		*mac_addr_changedp = B_TRUE;
392 	} else {
393 		/*
394 		 * Update the actual port MAC address to the MAC address
395 		 * of the group.
396 		 */
397 		if (aggr_port_unicst(port, grp->lg_addr) != 0) {
398 			*link_state_changedp = aggr_grp_detach_port(grp, port);
399 		} else {
400 			/*
401 			 * If a port was detached because of a previous
402 			 * failure changing the MAC address, the port is
403 			 * reattached when it successfully changes the MAC
404 			 * address now, and this might cause the link state
405 			 * of the aggregation to change.
406 			 */
407 			*link_state_changedp = aggr_grp_attach_port(grp, port);
408 		}
409 	}
410 }
411 
412 /*
413  * Add a port to a link aggregation group.
414  */
415 static int
416 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t linkid, boolean_t force,
417     aggr_port_t **pp)
418 {
419 	aggr_port_t *port, **cport;
420 	int err;
421 
422 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
423 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
424 
425 	/* create new port */
426 	err = aggr_port_create(linkid, force, &port);
427 	if (err != 0)
428 		return (err);
429 
430 	rw_enter(&port->lp_lock, RW_WRITER);
431 
432 	/* add port to list of group constituent ports */
433 	cport = &grp->lg_ports;
434 	while (*cport != NULL)
435 		cport = &((*cport)->lp_next);
436 	*cport = port;
437 
438 	/*
439 	 * Back reference to the group it is member of. A port always
440 	 * holds a reference to its group to ensure that the back
441 	 * reference is always valid.
442 	 */
443 	port->lp_grp = grp;
444 	AGGR_GRP_REFHOLD(grp);
445 	grp->lg_nports++;
446 
447 	aggr_lacp_init_port(port);
448 
449 	/*
450 	 * Initialize the callback functions for this port. Note that this
451 	 * can only be done after the lp_grp field is set.
452 	 */
453 	aggr_port_init_callbacks(port);
454 
455 	rw_exit(&port->lp_lock);
456 
457 	if (pp != NULL)
458 		*pp = port;
459 
460 	return (0);
461 }
462 
463 /*
464  * Add one or more ports to an existing link aggregation group.
465  */
466 int
467 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
468     laioc_port_t *ports)
469 {
470 	int rc, i, nadded = 0;
471 	aggr_grp_t *grp = NULL;
472 	aggr_port_t *port;
473 	boolean_t link_state_changed = B_FALSE;
474 
475 	/* get group corresponding to linkid */
476 	rw_enter(&aggr_grp_lock, RW_READER);
477 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
478 	    (mod_hash_val_t *)&grp) != 0) {
479 		rw_exit(&aggr_grp_lock);
480 		return (ENOENT);
481 	}
482 	AGGR_GRP_REFHOLD(grp);
483 	rw_exit(&aggr_grp_lock);
484 
485 	AGGR_LACP_LOCK(grp);
486 	rw_enter(&grp->lg_lock, RW_WRITER);
487 
488 	/* add the specified ports to group */
489 	for (i = 0; i < nports; i++) {
490 		/* add port to group */
491 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
492 		    force, &port)) != 0) {
493 			goto bail;
494 		}
495 		ASSERT(port != NULL);
496 		nadded++;
497 
498 		/* check capabilities */
499 		if (!aggr_grp_capab_check(grp, port) ||
500 		    !aggr_grp_sdu_check(grp, port) ||
501 		    !aggr_grp_margin_check(grp, port)) {
502 			rc = ENOTSUP;
503 			goto bail;
504 		}
505 
506 		/* start port if group has already been started */
507 		if (grp->lg_started) {
508 			rw_enter(&port->lp_lock, RW_WRITER);
509 			rc = aggr_port_start(port);
510 			if (rc != 0) {
511 				rw_exit(&port->lp_lock);
512 				goto bail;
513 			}
514 
515 			/* set port promiscuous mode */
516 			rc = aggr_port_promisc(port, grp->lg_promisc);
517 			if (rc != 0) {
518 				rw_exit(&port->lp_lock);
519 				goto bail;
520 			}
521 			rw_exit(&port->lp_lock);
522 		}
523 
524 		/*
525 		 * Attach each port if necessary.
526 		 */
527 		if (aggr_port_notify_link(grp, port, B_FALSE))
528 			link_state_changed = B_TRUE;
529 	}
530 
531 	/* update the MAC address of the constituent ports */
532 	if (aggr_grp_update_ports_mac(grp))
533 		link_state_changed = B_TRUE;
534 
535 	if (link_state_changed)
536 		mac_link_update(grp->lg_mh, grp->lg_link_state);
537 
538 bail:
539 	if (rc != 0) {
540 		/* stop and remove ports that have been added */
541 		for (i = 0; i < nadded && !grp->lg_closing; i++) {
542 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
543 			ASSERT(port != NULL);
544 			if (grp->lg_started) {
545 				rw_enter(&port->lp_lock, RW_WRITER);
546 				aggr_port_stop(port);
547 				rw_exit(&port->lp_lock);
548 			}
549 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
550 		}
551 	}
552 
553 	rw_exit(&grp->lg_lock);
554 	AGGR_LACP_UNLOCK(grp);
555 	if (rc == 0 && !grp->lg_closing)
556 		mac_resource_update(grp->lg_mh);
557 	AGGR_GRP_REFRELE(grp);
558 	return (rc);
559 }
560 
561 /*
562  * Update properties of an existing link aggregation group.
563  */
564 int
565 aggr_grp_modify(datalink_id_t linkid, aggr_grp_t *grp_arg, uint8_t update_mask,
566     uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr,
567     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
568 {
569 	int rc = 0;
570 	aggr_grp_t *grp = NULL;
571 	boolean_t mac_addr_changed = B_FALSE;
572 	boolean_t link_state_changed = B_FALSE;
573 
574 	if (grp_arg == NULL) {
575 		/* get group corresponding to linkid */
576 		rw_enter(&aggr_grp_lock, RW_READER);
577 		if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
578 		    (mod_hash_val_t *)&grp) != 0) {
579 			rc = ENOENT;
580 			goto bail;
581 		}
582 		AGGR_LACP_LOCK(grp);
583 		rw_enter(&grp->lg_lock, RW_WRITER);
584 	} else {
585 		grp = grp_arg;
586 		ASSERT(AGGR_LACP_LOCK_HELD(grp));
587 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
588 	}
589 
590 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
591 	AGGR_GRP_REFHOLD(grp);
592 
593 	/* validate fixed address if specified */
594 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
595 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
596 	    (mac_addr[0] & 0x01))) {
597 		rc = EINVAL;
598 		goto bail;
599 	}
600 
601 	/* update policy if requested */
602 	if (update_mask & AGGR_MODIFY_POLICY)
603 		aggr_send_update_policy(grp, policy);
604 
605 	/* update unicast MAC address if requested */
606 	if (update_mask & AGGR_MODIFY_MAC) {
607 		if (mac_fixed) {
608 			/* user-supplied MAC address */
609 			grp->lg_mac_addr_port = NULL;
610 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
611 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
612 				mac_addr_changed = B_TRUE;
613 			}
614 		} else if (grp->lg_addr_fixed) {
615 			/* switch from user-supplied to automatic */
616 			aggr_port_t *port = grp->lg_ports;
617 
618 			rw_enter(&port->lp_lock, RW_WRITER);
619 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
620 			grp->lg_mac_addr_port = port;
621 			mac_addr_changed = B_TRUE;
622 			rw_exit(&port->lp_lock);
623 		}
624 		grp->lg_addr_fixed = mac_fixed;
625 	}
626 
627 	if (mac_addr_changed)
628 		link_state_changed = aggr_grp_update_ports_mac(grp);
629 
630 	if (update_mask & AGGR_MODIFY_LACP_MODE)
631 		aggr_lacp_update_mode(grp, lacp_mode);
632 
633 	if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing)
634 		aggr_lacp_update_timer(grp, lacp_timer);
635 
636 bail:
637 	if (grp != NULL && !grp->lg_closing) {
638 		/*
639 		 * If grp_arg is non-NULL, this function is called from
640 		 * mac_unicst_set(), and the MAC_NOTE_UNICST notification
641 		 * will be sent there.
642 		 */
643 		if ((grp_arg == NULL) && mac_addr_changed)
644 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
645 
646 		if (link_state_changed)
647 			mac_link_update(grp->lg_mh, grp->lg_link_state);
648 
649 	}
650 
651 	if (grp_arg == NULL) {
652 		if (grp != NULL) {
653 			rw_exit(&grp->lg_lock);
654 			AGGR_LACP_UNLOCK(grp);
655 		}
656 		rw_exit(&aggr_grp_lock);
657 	}
658 
659 	if (grp != NULL)
660 		AGGR_GRP_REFRELE(grp);
661 
662 	return (rc);
663 }
664 
665 /*
666  * Create a new link aggregation group upon request from administrator.
667  * Returns 0 on success, an errno on failure.
668  */
669 int
670 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
671     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
672     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
673 {
674 	aggr_grp_t *grp = NULL;
675 	aggr_port_t *port;
676 	mac_register_t *mac;
677 	boolean_t link_state_changed;
678 	int err;
679 	int i;
680 
681 	/* need at least one port */
682 	if (nports == 0)
683 		return (EINVAL);
684 
685 	rw_enter(&aggr_grp_lock, RW_WRITER);
686 
687 	/* does a group with the same linkid already exist? */
688 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
689 	    (mod_hash_val_t *)&grp);
690 	if (err == 0) {
691 		rw_exit(&aggr_grp_lock);
692 		return (EEXIST);
693 	}
694 
695 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
696 
697 	AGGR_LACP_LOCK(grp);
698 	rw_enter(&grp->lg_lock, RW_WRITER);
699 
700 	grp->lg_refs = 1;
701 	grp->lg_closing = B_FALSE;
702 	grp->lg_force = force;
703 	grp->lg_linkid = linkid;
704 	grp->lg_ifspeed = 0;
705 	grp->lg_link_state = LINK_STATE_UNKNOWN;
706 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
707 	grp->lg_started = B_FALSE;
708 	grp->lg_promisc = B_FALSE;
709 	aggr_lacp_init_grp(grp);
710 
711 	/* add MAC ports to group */
712 	grp->lg_ports = NULL;
713 	grp->lg_nports = 0;
714 	grp->lg_nattached_ports = 0;
715 	grp->lg_ntx_ports = 0;
716 
717 	/*
718 	 * If key is not specified by the user, allocate the key.
719 	 */
720 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
721 		err = ENOMEM;
722 		goto bail;
723 	}
724 	grp->lg_key = key;
725 
726 	for (i = 0; i < nports; i++) {
727 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
728 		if (err != 0)
729 			goto bail;
730 	}
731 
732 	/*
733 	 * If no explicit MAC address was specified by the administrator,
734 	 * set it to the MAC address of the first port.
735 	 */
736 	grp->lg_addr_fixed = mac_fixed;
737 	if (grp->lg_addr_fixed) {
738 		/* validate specified address */
739 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
740 			err = EINVAL;
741 			goto bail;
742 		}
743 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
744 	} else {
745 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
746 		grp->lg_mac_addr_port = grp->lg_ports;
747 	}
748 
749 	/*
750 	 * Update the MAC address of the constituent ports.
751 	 * None of the port is attached at this time, the link state of the
752 	 * aggregation will not change.
753 	 */
754 	link_state_changed = aggr_grp_update_ports_mac(grp);
755 	ASSERT(!link_state_changed);
756 
757 	/* update outbound load balancing policy */
758 	aggr_send_update_policy(grp, policy);
759 
760 	/* set the initial group capabilities */
761 	aggr_grp_capab_set(grp);
762 
763 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
764 		err = ENOMEM;
765 		goto bail;
766 	}
767 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
768 	mac->m_driver = grp;
769 	mac->m_dip = aggr_dip;
770 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
771 	mac->m_src_addr = grp->lg_addr;
772 	mac->m_callbacks = &aggr_m_callbacks;
773 	mac->m_min_sdu = 0;
774 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
775 	mac->m_margin = aggr_grp_max_margin(grp);
776 	err = mac_register(mac, &grp->lg_mh);
777 	mac_free(mac);
778 	if (err != 0)
779 		goto bail;
780 
781 	if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) {
782 		(void) mac_unregister(grp->lg_mh);
783 		goto bail;
784 	}
785 
786 	/* set LACP mode */
787 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
788 
789 	/*
790 	 * Attach each port if necessary.
791 	 */
792 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
793 		if (aggr_port_notify_link(grp, port, B_FALSE))
794 			link_state_changed = B_TRUE;
795 	}
796 
797 	if (link_state_changed)
798 		mac_link_update(grp->lg_mh, grp->lg_link_state);
799 
800 	/* add new group to hash table */
801 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
802 	    (mod_hash_val_t)grp);
803 	ASSERT(err == 0);
804 	aggr_grp_cnt++;
805 
806 	rw_exit(&grp->lg_lock);
807 	AGGR_LACP_UNLOCK(grp);
808 	rw_exit(&aggr_grp_lock);
809 	return (0);
810 
811 bail:
812 	if (grp != NULL) {
813 		aggr_port_t *cport;
814 
815 		grp->lg_closing = B_TRUE;
816 
817 		port = grp->lg_ports;
818 		while (port != NULL) {
819 			cport = port->lp_next;
820 			aggr_port_delete(port);
821 			port = cport;
822 		}
823 
824 		rw_exit(&grp->lg_lock);
825 		AGGR_LACP_UNLOCK(grp);
826 
827 		AGGR_GRP_REFRELE(grp);
828 	}
829 
830 	rw_exit(&aggr_grp_lock);
831 	return (err);
832 }
833 
834 /*
835  * Return a pointer to the member of a group with specified linkid.
836  */
837 static aggr_port_t *
838 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
839 {
840 	aggr_port_t *port;
841 
842 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
843 
844 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
845 		if (port->lp_linkid == linkid)
846 			break;
847 	}
848 
849 	return (port);
850 }
851 
852 /*
853  * Stop, detach and remove a port from a link aggregation group.
854  */
855 static int
856 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
857     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
858 {
859 	int rc = 0;
860 	aggr_port_t **pport;
861 	boolean_t mac_addr_changed = B_FALSE;
862 	boolean_t link_state_changed = B_FALSE;
863 	uint64_t val;
864 	uint_t i;
865 	uint_t stat;
866 
867 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
868 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
869 	ASSERT(grp->lg_nports > 1);
870 	ASSERT(!grp->lg_closing);
871 
872 	/* unlink port */
873 	for (pport = &grp->lg_ports; *pport != port;
874 	    pport = &(*pport)->lp_next) {
875 		if (*pport == NULL) {
876 			rc = ENOENT;
877 			goto done;
878 		}
879 	}
880 	*pport = port->lp_next;
881 
882 	atomic_add_32(&port->lp_closing, 1);
883 
884 	rw_enter(&port->lp_lock, RW_WRITER);
885 
886 	/*
887 	 * If the MAC address of the port being removed was assigned
888 	 * to the group, update the group MAC address
889 	 * using the MAC address of a different port.
890 	 */
891 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
892 		/*
893 		 * Set the MAC address of the group to the
894 		 * MAC address of its first port.
895 		 */
896 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
897 		grp->lg_mac_addr_port = grp->lg_ports;
898 		mac_addr_changed = B_TRUE;
899 	}
900 
901 	link_state_changed = aggr_grp_detach_port(grp, port);
902 
903 	/*
904 	 * Add the counter statistics of the ports while it was aggregated
905 	 * to the group's residual statistics.  This is done by obtaining
906 	 * the current counter from the underlying MAC then subtracting the
907 	 * value of the counter at the moment it was added to the
908 	 * aggregation.
909 	 */
910 	for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) {
911 		stat = i + MAC_STAT_MIN;
912 		if (!MAC_STAT_ISACOUNTER(stat))
913 			continue;
914 		val = aggr_port_stat(port, stat);
915 		val -= port->lp_stat[i];
916 		grp->lg_stat[i] += val;
917 	}
918 	for (i = 0; i < ETHER_NSTAT && !grp->lg_closing; i++) {
919 		stat = i + MACTYPE_STAT_MIN;
920 		if (!ETHER_STAT_ISACOUNTER(stat))
921 			continue;
922 		val = aggr_port_stat(port, stat);
923 		val -= port->lp_ether_stat[i];
924 		grp->lg_ether_stat[i] += val;
925 	}
926 
927 	grp->lg_nports--;
928 
929 	rw_exit(&port->lp_lock);
930 
931 	aggr_port_delete(port);
932 
933 	/*
934 	 * If the group MAC address has changed, update the MAC address of
935 	 * the remaining constituent ports according to the new MAC
936 	 * address of the group.
937 	 */
938 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
939 		link_state_changed = B_TRUE;
940 
941 done:
942 	if (mac_addr_changedp != NULL)
943 		*mac_addr_changedp = mac_addr_changed;
944 	if (link_state_changedp != NULL)
945 		*link_state_changedp = link_state_changed;
946 
947 	return (rc);
948 }
949 
950 /*
951  * Remove one or more ports from an existing link aggregation group.
952  */
953 int
954 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
955 {
956 	int rc = 0, i;
957 	aggr_grp_t *grp = NULL;
958 	aggr_port_t *port;
959 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
960 	boolean_t link_state_update = B_FALSE, link_state_changed;
961 
962 	/* get group corresponding to linkid */
963 	rw_enter(&aggr_grp_lock, RW_READER);
964 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
965 	    (mod_hash_val_t *)&grp) != 0) {
966 		rw_exit(&aggr_grp_lock);
967 		return (ENOENT);
968 	}
969 	AGGR_GRP_REFHOLD(grp);
970 	rw_exit(&aggr_grp_lock);
971 
972 	AGGR_LACP_LOCK(grp);
973 	rw_enter(&grp->lg_lock, RW_WRITER);
974 
975 	/* we need to keep at least one port per group */
976 	if (nports >= grp->lg_nports) {
977 		rc = EINVAL;
978 		goto bail;
979 	}
980 
981 	/* first verify that all the groups are valid */
982 	for (i = 0; i < nports; i++) {
983 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
984 			/* port not found */
985 			rc = ENOENT;
986 			goto bail;
987 		}
988 	}
989 
990 	/* remove the specified ports from group */
991 	for (i = 0; i < nports && !grp->lg_closing; i++) {
992 		/* lookup port */
993 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
994 		ASSERT(port != NULL);
995 
996 		/* stop port if group has already been started */
997 		if (grp->lg_started) {
998 			rw_enter(&port->lp_lock, RW_WRITER);
999 			aggr_port_stop(port);
1000 			rw_exit(&port->lp_lock);
1001 		}
1002 
1003 		/* remove port from group */
1004 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1005 		    &link_state_changed);
1006 		ASSERT(rc == 0);
1007 		mac_addr_update = mac_addr_update || mac_addr_changed;
1008 		link_state_update = link_state_update || link_state_changed;
1009 	}
1010 
1011 bail:
1012 	rw_exit(&grp->lg_lock);
1013 	AGGR_LACP_UNLOCK(grp);
1014 	if (!grp->lg_closing) {
1015 		if (mac_addr_update)
1016 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
1017 		if (link_state_update)
1018 			mac_link_update(grp->lg_mh, grp->lg_link_state);
1019 		if (rc == 0)
1020 			mac_resource_update(grp->lg_mh);
1021 	}
1022 	AGGR_GRP_REFRELE(grp);
1023 
1024 	return (rc);
1025 }
1026 
1027 int
1028 aggr_grp_delete(datalink_id_t linkid)
1029 {
1030 	aggr_grp_t *grp = NULL;
1031 	aggr_port_t *port, *cport;
1032 	datalink_id_t tmpid;
1033 	mod_hash_val_t val;
1034 	int err;
1035 
1036 	rw_enter(&aggr_grp_lock, RW_WRITER);
1037 
1038 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1039 	    (mod_hash_val_t *)&grp) != 0) {
1040 		rw_exit(&aggr_grp_lock);
1041 		return (ENOENT);
1042 	}
1043 
1044 	/*
1045 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1046 	 * held. Otherwise, it will deadlock if another thread is in
1047 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1048 	 * dls_devnet_destroy() needs to delete.
1049 	 */
1050 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid)) != 0) {
1051 		rw_exit(&aggr_grp_lock);
1052 		return (err);
1053 	}
1054 	ASSERT(linkid == tmpid);
1055 
1056 	AGGR_LACP_LOCK(grp);
1057 	rw_enter(&grp->lg_lock, RW_WRITER);
1058 
1059 	/*
1060 	 * Unregister from the MAC service module. Since this can
1061 	 * fail if a client hasn't closed the MAC port, we gracefully
1062 	 * fail the operation.
1063 	 */
1064 	grp->lg_closing = B_TRUE;
1065 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1066 		grp->lg_closing = B_FALSE;
1067 		rw_exit(&grp->lg_lock);
1068 		AGGR_LACP_UNLOCK(grp);
1069 
1070 		(void) dls_devnet_create(grp->lg_mh, linkid);
1071 		rw_exit(&aggr_grp_lock);
1072 		return (err);
1073 	}
1074 
1075 	/* detach and free MAC ports associated with group */
1076 	port = grp->lg_ports;
1077 	while (port != NULL) {
1078 		cport = port->lp_next;
1079 		rw_enter(&port->lp_lock, RW_WRITER);
1080 		if (grp->lg_started)
1081 			aggr_port_stop(port);
1082 		(void) aggr_grp_detach_port(grp, port);
1083 		rw_exit(&port->lp_lock);
1084 		aggr_port_delete(port);
1085 		port = cport;
1086 	}
1087 
1088 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1089 
1090 	rw_exit(&grp->lg_lock);
1091 	AGGR_LACP_UNLOCK(grp);
1092 
1093 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1094 	ASSERT(grp == (aggr_grp_t *)val);
1095 
1096 	ASSERT(aggr_grp_cnt > 0);
1097 	aggr_grp_cnt--;
1098 
1099 	rw_exit(&aggr_grp_lock);
1100 	AGGR_GRP_REFRELE(grp);
1101 
1102 	return (0);
1103 }
1104 
1105 void
1106 aggr_grp_free(aggr_grp_t *grp)
1107 {
1108 	ASSERT(grp->lg_refs == 0);
1109 	if (grp->lg_key > AGGR_MAX_KEY) {
1110 		id_free(key_ids, grp->lg_key);
1111 		grp->lg_key = 0;
1112 	}
1113 	kmem_cache_free(aggr_grp_cache, grp);
1114 }
1115 
1116 int
1117 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1118     aggr_grp_info_new_grp_fn_t new_grp_fn,
1119     aggr_grp_info_new_port_fn_t new_port_fn)
1120 {
1121 	aggr_grp_t	*grp;
1122 	aggr_port_t	*port;
1123 	int		rc = 0;
1124 
1125 	rw_enter(&aggr_grp_lock, RW_READER);
1126 
1127 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1128 	    (mod_hash_val_t *)&grp) != 0) {
1129 		rw_exit(&aggr_grp_lock);
1130 		return (ENOENT);
1131 	}
1132 
1133 	rw_enter(&grp->lg_lock, RW_READER);
1134 
1135 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1136 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1137 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1138 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1139 
1140 	if (rc != 0)
1141 		goto bail;
1142 
1143 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1144 		rw_enter(&port->lp_lock, RW_READER);
1145 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1146 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1147 		rw_exit(&port->lp_lock);
1148 
1149 		if (rc != 0)
1150 			goto bail;
1151 	}
1152 
1153 bail:
1154 	rw_exit(&grp->lg_lock);
1155 	rw_exit(&aggr_grp_lock);
1156 	return (rc);
1157 }
1158 
1159 static void
1160 aggr_m_resources(void *arg)
1161 {
1162 	aggr_grp_t *grp = arg;
1163 	aggr_port_t *port;
1164 
1165 	/* Call each port's m_resources function */
1166 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1167 		mac_resources(port->lp_mh);
1168 }
1169 
1170 /*ARGSUSED*/
1171 static void
1172 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1173 {
1174 	miocnak(q, mp, 0, ENOTSUP);
1175 }
1176 
1177 static int
1178 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1179 {
1180 	aggr_port_t	*port;
1181 	uint_t		stat_index;
1182 
1183 	/* We only aggregate counter statistics. */
1184 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1185 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1186 		return (ENOTSUP);
1187 	}
1188 
1189 	/*
1190 	 * Counter statistics for a group are computed by aggregating the
1191 	 * counters of the members MACs while they were aggregated, plus
1192 	 * the residual counter of the group itself, which is updated each
1193 	 * time a MAC is removed from the group.
1194 	 */
1195 	*val = 0;
1196 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1197 		/* actual port statistic */
1198 		*val += aggr_port_stat(port, stat);
1199 		/*
1200 		 * minus the port stat when it was added, plus any residual
1201 		 * amount for the group.
1202 		 */
1203 		if (IS_MAC_STAT(stat)) {
1204 			stat_index = stat - MAC_STAT_MIN;
1205 			*val -= port->lp_stat[stat_index];
1206 			*val += grp->lg_stat[stat_index];
1207 		} else if (IS_MACTYPE_STAT(stat)) {
1208 			stat_index = stat - MACTYPE_STAT_MIN;
1209 			*val -= port->lp_ether_stat[stat_index];
1210 			*val += grp->lg_ether_stat[stat_index];
1211 		}
1212 	}
1213 	return (0);
1214 }
1215 
1216 static int
1217 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1218 {
1219 	aggr_grp_t	*grp = arg;
1220 	int		rval = 0;
1221 
1222 	rw_enter(&grp->lg_lock, RW_READER);
1223 
1224 	switch (stat) {
1225 	case MAC_STAT_IFSPEED:
1226 		*val = grp->lg_ifspeed;
1227 		break;
1228 
1229 	case ETHER_STAT_LINK_DUPLEX:
1230 		*val = grp->lg_link_duplex;
1231 		break;
1232 
1233 	default:
1234 		/*
1235 		 * For all other statistics, we return the aggregated stat
1236 		 * from the underlying ports.  aggr_grp_stat() will set
1237 		 * rval appropriately if the statistic isn't a counter.
1238 		 */
1239 		rval = aggr_grp_stat(grp, stat, val);
1240 	}
1241 
1242 	rw_exit(&grp->lg_lock);
1243 	return (rval);
1244 }
1245 
1246 static int
1247 aggr_m_start(void *arg)
1248 {
1249 	aggr_grp_t *grp = arg;
1250 	aggr_port_t *port;
1251 
1252 	AGGR_LACP_LOCK(grp);
1253 	rw_enter(&grp->lg_lock, RW_WRITER);
1254 
1255 	/*
1256 	 * Attempts to start all configured members of the group.
1257 	 * Group members will be attached when their link-up notification
1258 	 * is received.
1259 	 */
1260 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1261 		rw_enter(&port->lp_lock, RW_WRITER);
1262 		if (aggr_port_start(port) != 0) {
1263 			rw_exit(&port->lp_lock);
1264 			continue;
1265 		}
1266 
1267 		/* set port promiscuous mode */
1268 		if (aggr_port_promisc(port, grp->lg_promisc) != 0)
1269 			aggr_port_stop(port);
1270 		rw_exit(&port->lp_lock);
1271 	}
1272 
1273 	grp->lg_started = B_TRUE;
1274 
1275 	rw_exit(&grp->lg_lock);
1276 	AGGR_LACP_UNLOCK(grp);
1277 
1278 	return (0);
1279 }
1280 
1281 static void
1282 aggr_m_stop(void *arg)
1283 {
1284 	aggr_grp_t *grp = arg;
1285 	aggr_port_t *port;
1286 
1287 	rw_enter(&grp->lg_lock, RW_WRITER);
1288 
1289 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1290 		rw_enter(&port->lp_lock, RW_WRITER);
1291 		aggr_port_stop(port);
1292 		rw_exit(&port->lp_lock);
1293 	}
1294 
1295 	grp->lg_started = B_FALSE;
1296 
1297 	rw_exit(&grp->lg_lock);
1298 }
1299 
1300 static int
1301 aggr_m_promisc(void *arg, boolean_t on)
1302 {
1303 	aggr_grp_t *grp = arg;
1304 	aggr_port_t *port;
1305 	boolean_t link_state_changed = B_FALSE;
1306 
1307 	AGGR_LACP_LOCK(grp);
1308 	rw_enter(&grp->lg_lock, RW_WRITER);
1309 	AGGR_GRP_REFHOLD(grp);
1310 
1311 	ASSERT(!grp->lg_closing);
1312 
1313 	if (on == grp->lg_promisc)
1314 		goto bail;
1315 
1316 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1317 		rw_enter(&port->lp_lock, RW_WRITER);
1318 		AGGR_PORT_REFHOLD(port);
1319 		if (port->lp_started) {
1320 			if (aggr_port_promisc(port, on) != 0) {
1321 				if (aggr_grp_detach_port(grp, port))
1322 					link_state_changed = B_TRUE;
1323 			} else {
1324 				/*
1325 				 * If a port was detached because of a previous
1326 				 * failure changing the promiscuity, the port
1327 				 * is reattached when it successfully changes
1328 				 * the promiscuity now, and this might cause
1329 				 * the link state of the aggregation to change.
1330 				 */
1331 				if (aggr_grp_attach_port(grp, port))
1332 					link_state_changed = B_TRUE;
1333 			}
1334 		}
1335 		rw_exit(&port->lp_lock);
1336 		AGGR_PORT_REFRELE(port);
1337 	}
1338 
1339 	grp->lg_promisc = on;
1340 
1341 	if (link_state_changed)
1342 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1343 
1344 bail:
1345 	rw_exit(&grp->lg_lock);
1346 	AGGR_LACP_UNLOCK(grp);
1347 	AGGR_GRP_REFRELE(grp);
1348 
1349 	return (0);
1350 }
1351 
1352 /*
1353  * Initialize the capabilities that are advertised for the group
1354  * according to the capabilities of the constituent ports.
1355  */
1356 static boolean_t
1357 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1358 {
1359 	aggr_grp_t *grp = arg;
1360 
1361 	switch (cap) {
1362 	case MAC_CAPAB_HCKSUM: {
1363 		uint32_t *hcksum_txflags = cap_data;
1364 		*hcksum_txflags = grp->lg_hcksum_txflags;
1365 		break;
1366 	}
1367 	case MAC_CAPAB_POLL:
1368 		/*
1369 		 * There's nothing for us to fill in, we simply return
1370 		 * B_TRUE or B_FALSE to represent the group's support
1371 		 * status for this capability.
1372 		 */
1373 		return (grp->lg_gldv3_polling);
1374 	case MAC_CAPAB_NO_NATIVEVLAN:
1375 		return (!grp->lg_vlan);
1376 	case MAC_CAPAB_NO_ZCOPY:
1377 		return (!grp->lg_zcopy);
1378 	default:
1379 		return (B_FALSE);
1380 	}
1381 	return (B_TRUE);
1382 }
1383 
1384 /*
1385  * Add or remove the multicast addresses that are defined for the group
1386  * to or from the specified port.
1387  * This function is called before stopping a port, before a port
1388  * is detached from a group, and when attaching a port to a group.
1389  */
1390 void
1391 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
1392 {
1393 	aggr_grp_t *grp = port->lp_grp;
1394 
1395 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
1396 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
1397 
1398 	if (!port->lp_started)
1399 		return;
1400 
1401 	mac_multicst_refresh(grp->lg_mh, aggr_port_multicst, port, add);
1402 }
1403 
1404 static int
1405 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1406 {
1407 	aggr_grp_t *grp = arg;
1408 	aggr_port_t *port = NULL;
1409 	int err = 0, cerr;
1410 
1411 	rw_enter(&grp->lg_lock, RW_WRITER);
1412 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1413 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
1414 			continue;
1415 		cerr = aggr_port_multicst(port, add, addrp);
1416 		if (cerr != 0 && err == 0)
1417 			err = cerr;
1418 	}
1419 	rw_exit(&grp->lg_lock);
1420 	return (err);
1421 }
1422 
1423 static int
1424 aggr_m_unicst(void *arg, const uint8_t *macaddr)
1425 {
1426 	aggr_grp_t *grp = arg;
1427 	int rc;
1428 
1429 	AGGR_LACP_LOCK(grp);
1430 	rw_enter(&grp->lg_lock, RW_WRITER);
1431 	rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
1432 	    0, 0);
1433 	rw_exit(&grp->lg_lock);
1434 	AGGR_LACP_UNLOCK(grp);
1435 
1436 	return (rc);
1437 }
1438 
1439 /*
1440  * Initialize the capabilities that are advertised for the group
1441  * according to the capabilities of the constituent ports.
1442  */
1443 static void
1444 aggr_grp_capab_set(aggr_grp_t *grp)
1445 {
1446 	uint32_t cksum;
1447 	aggr_port_t *port;
1448 
1449 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1450 	ASSERT(grp->lg_ports != NULL);
1451 
1452 	grp->lg_hcksum_txflags = (uint32_t)-1;
1453 	grp->lg_gldv3_polling = B_TRUE;
1454 	grp->lg_zcopy = B_TRUE;
1455 	grp->lg_vlan = B_TRUE;
1456 
1457 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1458 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
1459 			cksum = 0;
1460 		grp->lg_hcksum_txflags &= cksum;
1461 
1462 		grp->lg_vlan &=
1463 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
1464 
1465 		grp->lg_zcopy &=
1466 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
1467 
1468 		grp->lg_gldv3_polling &=
1469 		    mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL);
1470 	}
1471 }
1472 
1473 /*
1474  * Checks whether the capabilities of the port being added are compatible
1475  * with the current capabilities of the aggregation.
1476  */
1477 static boolean_t
1478 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
1479 {
1480 	uint32_t hcksum_txflags;
1481 
1482 	ASSERT(grp->lg_ports != NULL);
1483 
1484 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
1485 	    grp->lg_vlan) != grp->lg_vlan) {
1486 		return (B_FALSE);
1487 	}
1488 
1489 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
1490 	    grp->lg_zcopy) != grp->lg_zcopy) {
1491 		return (B_FALSE);
1492 	}
1493 
1494 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
1495 		if (grp->lg_hcksum_txflags != 0)
1496 			return (B_FALSE);
1497 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
1498 	    grp->lg_hcksum_txflags) {
1499 		return (B_FALSE);
1500 	}
1501 
1502 	if (mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL) !=
1503 	    grp->lg_gldv3_polling) {
1504 		return (B_FALSE);
1505 	}
1506 
1507 	return (B_TRUE);
1508 }
1509 
1510 /*
1511  * Returns the maximum SDU according to the SDU of the constituent ports.
1512  */
1513 static uint_t
1514 aggr_grp_max_sdu(aggr_grp_t *grp)
1515 {
1516 	uint_t max_sdu = (uint_t)-1;
1517 	aggr_port_t *port;
1518 
1519 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1520 	ASSERT(grp->lg_ports != NULL);
1521 
1522 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1523 		uint_t port_sdu_max;
1524 
1525 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
1526 		if (max_sdu > port_sdu_max)
1527 			max_sdu = port_sdu_max;
1528 	}
1529 
1530 	return (max_sdu);
1531 }
1532 
1533 /*
1534  * Checks if the maximum SDU of the specified port is compatible
1535  * with the maximum SDU of the specified aggregation group, returns
1536  * B_TRUE if it is, B_FALSE otherwise.
1537  */
1538 static boolean_t
1539 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
1540 {
1541 	uint_t port_sdu_max;
1542 
1543 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
1544 	return (port_sdu_max >= grp->lg_max_sdu);
1545 }
1546 
1547 /*
1548  * Returns the maximum margin according to the margin of the constituent ports.
1549  */
1550 static uint32_t
1551 aggr_grp_max_margin(aggr_grp_t *grp)
1552 {
1553 	uint32_t margin = UINT32_MAX;
1554 	aggr_port_t *port;
1555 
1556 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1557 	ASSERT(grp->lg_ports != NULL);
1558 
1559 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1560 		if (margin > port->lp_margin)
1561 			margin = port->lp_margin;
1562 	}
1563 
1564 	grp->lg_margin = margin;
1565 	return (margin);
1566 }
1567 
1568 /*
1569  * Checks if the maximum margin of the specified port is compatible
1570  * with the maximum margin of the specified aggregation group, returns
1571  * B_TRUE if it is, B_FALSE otherwise.
1572  */
1573 static boolean_t
1574 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
1575 {
1576 	if (port->lp_margin >= grp->lg_margin)
1577 		return (B_TRUE);
1578 
1579 	/*
1580 	 * See whether the current margin value is allowed to be changed to
1581 	 * the new value.
1582 	 */
1583 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
1584 		return (B_FALSE);
1585 
1586 	grp->lg_margin = port->lp_margin;
1587 	return (B_TRUE);
1588 }
1589