xref: /illumos-gate/usr/src/uts/common/io/aggr/aggr_grp.c (revision 3a109ad9413b360a5bfa6fa5ddfacef5fd64fe5b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28  *
29  * An instance of the structure aggr_grp_t is allocated for each
30  * link aggregation group. When created, aggr_grp_t objects are
31  * entered into the aggr_grp_hash hash table maintained by the modhash
32  * module. The hash key is the linkid associated with the link
33  * aggregation group.
34  *
35  * A set of MAC ports are associated with each association group.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/sysmacros.h>
40 #include <sys/conf.h>
41 #include <sys/cmn_err.h>
42 #include <sys/list.h>
43 #include <sys/ksynch.h>
44 #include <sys/kmem.h>
45 #include <sys/stream.h>
46 #include <sys/modctl.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/atomic.h>
50 #include <sys/stat.h>
51 #include <sys/modhash.h>
52 #include <sys/id_space.h>
53 #include <sys/strsun.h>
54 #include <sys/dlpi.h>
55 #include <sys/dls.h>
56 #include <sys/vlan.h>
57 #include <sys/aggr.h>
58 #include <sys/aggr_impl.h>
59 
60 static int aggr_m_start(void *);
61 static void aggr_m_stop(void *);
62 static int aggr_m_promisc(void *, boolean_t);
63 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
64 static int aggr_m_unicst(void *, const uint8_t *);
65 static int aggr_m_stat(void *, uint_t, uint64_t *);
66 static void aggr_m_resources(void *);
67 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
68 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
69 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
70 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
71     boolean_t *);
72 
73 static void aggr_grp_capab_set(aggr_grp_t *);
74 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
75 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
76 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
77 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
78 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
79 static int aggr_grp_multicst(aggr_grp_t *grp, boolean_t add,
80     const uint8_t *addrp);
81 
82 static kmem_cache_t	*aggr_grp_cache;
83 static mod_hash_t	*aggr_grp_hash;
84 static krwlock_t	aggr_grp_lock;
85 static uint_t		aggr_grp_cnt;
86 static id_space_t	*key_ids;
87 
88 #define	GRP_HASHSZ		64
89 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
90 
91 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
92 
93 #define	AGGR_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_IOCTL | MC_GETCAPAB)
94 
95 static mac_callbacks_t aggr_m_callbacks = {
96 	AGGR_M_CALLBACK_FLAGS,
97 	aggr_m_stat,
98 	aggr_m_start,
99 	aggr_m_stop,
100 	aggr_m_promisc,
101 	aggr_m_multicst,
102 	aggr_m_unicst,
103 	aggr_m_tx,
104 	aggr_m_resources,
105 	aggr_m_ioctl,
106 	aggr_m_capab_get
107 };
108 
109 /*ARGSUSED*/
110 static int
111 aggr_grp_constructor(void *buf, void *arg, int kmflag)
112 {
113 	aggr_grp_t *grp = buf;
114 
115 	bzero(grp, sizeof (*grp));
116 	rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL);
117 	rw_init(&grp->aggr.gl_lock, NULL, RW_DRIVER, NULL);
118 
119 	grp->lg_link_state = LINK_STATE_UNKNOWN;
120 
121 	return (0);
122 }
123 
124 /*ARGSUSED*/
125 static void
126 aggr_grp_destructor(void *buf, void *arg)
127 {
128 	aggr_grp_t *grp = buf;
129 
130 	if (grp->lg_tx_ports != NULL) {
131 		kmem_free(grp->lg_tx_ports,
132 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
133 	}
134 
135 	rw_destroy(&grp->aggr.gl_lock);
136 	rw_destroy(&grp->lg_lock);
137 }
138 
139 void
140 aggr_grp_init(void)
141 {
142 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
143 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
144 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
145 
146 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
147 	    GRP_HASHSZ, mod_hash_null_valdtor);
148 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
149 	aggr_grp_cnt = 0;
150 
151 	/*
152 	 * Allocate an id space to manage key values (when key is not
153 	 * specified). The range of the id space will be from
154 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
155 	 * uses a 16-bit key.
156 	 */
157 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
158 	ASSERT(key_ids != NULL);
159 }
160 
161 void
162 aggr_grp_fini(void)
163 {
164 	id_space_destroy(key_ids);
165 	rw_destroy(&aggr_grp_lock);
166 	mod_hash_destroy_idhash(aggr_grp_hash);
167 	kmem_cache_destroy(aggr_grp_cache);
168 }
169 
170 uint_t
171 aggr_grp_count(void)
172 {
173 	uint_t	count;
174 
175 	rw_enter(&aggr_grp_lock, RW_READER);
176 	count = aggr_grp_cnt;
177 	rw_exit(&aggr_grp_lock);
178 	return (count);
179 }
180 
181 /*
182  * Attach a port to a link aggregation group.
183  *
184  * A port is attached to a link aggregation group once its speed
185  * and link state have been verified.
186  *
187  * Returns B_TRUE if the group link state or speed has changed. If
188  * it's the case, the caller must notify the MAC layer via a call
189  * to mac_link().
190  */
191 boolean_t
192 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
193 {
194 	boolean_t link_state_changed = B_FALSE;
195 
196 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
197 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
198 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
199 
200 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
201 		return (B_FALSE);
202 
203 	/*
204 	 * Validate the MAC port link speed and update the group
205 	 * link speed if needed.
206 	 */
207 	if (port->lp_ifspeed == 0 ||
208 	    port->lp_link_state != LINK_STATE_UP ||
209 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
210 		/*
211 		 * Can't attach a MAC port with unknown link speed,
212 		 * down link, or not in full duplex mode.
213 		 */
214 		return (B_FALSE);
215 	}
216 
217 	if (grp->lg_ifspeed == 0) {
218 		/*
219 		 * The group inherits the speed of the first link being
220 		 * attached.
221 		 */
222 		grp->lg_ifspeed = port->lp_ifspeed;
223 		link_state_changed = B_TRUE;
224 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
225 		/*
226 		 * The link speed of the MAC port must be the same as
227 		 * the group link speed, as per 802.3ad. Since it is
228 		 * not, the attach is cancelled.
229 		 */
230 		return (B_FALSE);
231 	}
232 
233 	grp->lg_nattached_ports++;
234 
235 	/*
236 	 * Update the group link state.
237 	 */
238 	if (grp->lg_link_state != LINK_STATE_UP) {
239 		grp->lg_link_state = LINK_STATE_UP;
240 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
241 		link_state_changed = B_TRUE;
242 	}
243 
244 	aggr_grp_multicst_port(port, B_TRUE);
245 
246 	/*
247 	 * Update port's state.
248 	 */
249 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
250 
251 	/*
252 	 * Set port's receive callback
253 	 */
254 	port->lp_mrh = mac_rx_add(port->lp_mh, aggr_recv_cb, (void *)port);
255 
256 	/*
257 	 * If LACP is OFF, the port can be used to send data as soon
258 	 * as its link is up and verified to be compatible with the
259 	 * aggregation.
260 	 *
261 	 * If LACP is active or passive, notify the LACP subsystem, which
262 	 * will enable sending on the port following the LACP protocol.
263 	 */
264 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
265 		aggr_send_port_enable(port);
266 	else
267 		aggr_lacp_port_attached(port);
268 
269 	return (link_state_changed);
270 }
271 
272 boolean_t
273 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t port_detach)
274 {
275 	boolean_t link_state_changed = B_FALSE;
276 
277 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
278 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
279 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
280 
281 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
282 		return (B_FALSE);
283 
284 	mac_rx_remove(port->lp_mh, port->lp_mrh, B_FALSE);
285 
286 	aggr_grp_multicst_port(port, B_FALSE);
287 
288 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
289 		aggr_send_port_disable(port);
290 	else if (port_detach)
291 		aggr_lacp_port_detached(port);
292 
293 	/* update state */
294 	port->lp_state = AGGR_PORT_STATE_STANDBY;
295 	grp->lg_nattached_ports--;
296 	if (grp->lg_nattached_ports == 0) {
297 		/* the last attached MAC port of the group is being detached */
298 		grp->lg_ifspeed = 0;
299 		grp->lg_link_state = LINK_STATE_DOWN;
300 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
301 		link_state_changed = B_TRUE;
302 	}
303 
304 	return (link_state_changed);
305 }
306 
307 /*
308  * Update the MAC addresses of the constituent ports of the specified
309  * group. This function is invoked:
310  * - after creating a new aggregation group.
311  * - after adding new ports to an aggregation group.
312  * - after removing a port from a group when the MAC address of
313  *   that port was used for the MAC address of the group.
314  * - after the MAC address of a port changed when the MAC address
315  *   of that port was used for the MAC address of the group.
316  *
317  * Return true if the link state of the aggregation changed, for example
318  * as a result of a failure changing the MAC address of one of the
319  * constituent ports.
320  */
321 boolean_t
322 aggr_grp_update_ports_mac(aggr_grp_t *grp)
323 {
324 	aggr_port_t *cport;
325 	boolean_t link_state_changed = B_FALSE;
326 
327 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
328 
329 	if (grp->lg_closing)
330 		return (link_state_changed);
331 
332 	for (cport = grp->lg_ports; cport != NULL;
333 	    cport = cport->lp_next) {
334 		rw_enter(&cport->lp_lock, RW_WRITER);
335 		if (aggr_port_unicst(cport, grp->lg_addr) != 0) {
336 			if (aggr_grp_detach_port(grp, cport, B_TRUE))
337 				link_state_changed = B_TRUE;
338 		} else {
339 			/*
340 			 * If a port was detached because of a previous
341 			 * failure changing the MAC address, the port is
342 			 * reattached when it successfully changes the MAC
343 			 * address now, and this might cause the link state
344 			 * of the aggregation to change.
345 			 */
346 			if (aggr_grp_attach_port(grp, cport))
347 				link_state_changed = B_TRUE;
348 		}
349 		rw_exit(&cport->lp_lock);
350 	}
351 	return (link_state_changed);
352 }
353 
354 /*
355  * Invoked when the MAC address of a port has changed. If the port's
356  * MAC address was used for the group MAC address, set mac_addr_changedp
357  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
358  * notification. If the link state changes due to detach/attach of
359  * the constituent port, set link_state_changedp to B_TRUE to indicate
360  * to the caller that it should send a MAC_NOTE_LINK notification. In both
361  * cases, it is the responsibility of the caller to invoke notification
362  * functions after releasing the the port lock.
363  */
364 void
365 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
366     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
367 {
368 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
369 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
370 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
371 	ASSERT(mac_addr_changedp != NULL);
372 	ASSERT(link_state_changedp != NULL);
373 
374 	*mac_addr_changedp = B_FALSE;
375 	*link_state_changedp = B_FALSE;
376 
377 	if (grp->lg_addr_fixed) {
378 		/*
379 		 * The group is using a fixed MAC address or an automatic
380 		 * MAC address has not been set.
381 		 */
382 		return;
383 	}
384 
385 	if (grp->lg_mac_addr_port == port) {
386 		/*
387 		 * The MAC address of the port was assigned to the group
388 		 * MAC address. Update the group MAC address.
389 		 */
390 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
391 		*mac_addr_changedp = B_TRUE;
392 	} else {
393 		/*
394 		 * Update the actual port MAC address to the MAC address
395 		 * of the group.
396 		 */
397 		if (aggr_port_unicst(port, grp->lg_addr) != 0) {
398 			*link_state_changedp = aggr_grp_detach_port(grp, port,
399 			    B_TRUE);
400 		} else {
401 			/*
402 			 * If a port was detached because of a previous
403 			 * failure changing the MAC address, the port is
404 			 * reattached when it successfully changes the MAC
405 			 * address now, and this might cause the link state
406 			 * of the aggregation to change.
407 			 */
408 			*link_state_changedp = aggr_grp_attach_port(grp, port);
409 		}
410 	}
411 }
412 
413 /*
414  * Add a port to a link aggregation group.
415  */
416 static int
417 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t linkid, boolean_t force,
418     aggr_port_t **pp)
419 {
420 	aggr_port_t *port, **cport;
421 	int err;
422 
423 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
424 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
425 
426 	/* create new port */
427 	err = aggr_port_create(linkid, force, &port);
428 	if (err != 0)
429 		return (err);
430 
431 	rw_enter(&port->lp_lock, RW_WRITER);
432 
433 	/* add port to list of group constituent ports */
434 	cport = &grp->lg_ports;
435 	while (*cport != NULL)
436 		cport = &((*cport)->lp_next);
437 	*cport = port;
438 
439 	/*
440 	 * Back reference to the group it is member of. A port always
441 	 * holds a reference to its group to ensure that the back
442 	 * reference is always valid.
443 	 */
444 	port->lp_grp = grp;
445 	AGGR_GRP_REFHOLD(grp);
446 	grp->lg_nports++;
447 
448 	aggr_lacp_init_port(port);
449 
450 	/*
451 	 * Initialize the callback functions for this port. Note that this
452 	 * can only be done after the lp_grp field is set.
453 	 */
454 	aggr_port_init_callbacks(port);
455 
456 	rw_exit(&port->lp_lock);
457 
458 	if (pp != NULL)
459 		*pp = port;
460 
461 	return (0);
462 }
463 
464 /*
465  * Add one or more ports to an existing link aggregation group.
466  */
467 int
468 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
469     laioc_port_t *ports)
470 {
471 	int rc, i, nadded = 0;
472 	aggr_grp_t *grp = NULL;
473 	aggr_port_t *port;
474 	boolean_t link_state_changed = B_FALSE;
475 
476 	/* get group corresponding to linkid */
477 	rw_enter(&aggr_grp_lock, RW_READER);
478 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
479 	    (mod_hash_val_t *)&grp) != 0) {
480 		rw_exit(&aggr_grp_lock);
481 		return (ENOENT);
482 	}
483 	AGGR_GRP_REFHOLD(grp);
484 	rw_exit(&aggr_grp_lock);
485 
486 	AGGR_LACP_LOCK_WRITER(grp);
487 	rw_enter(&grp->lg_lock, RW_WRITER);
488 
489 	/* add the specified ports to group */
490 	for (i = 0; i < nports; i++) {
491 		/* add port to group */
492 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
493 		    force, &port)) != 0) {
494 			goto bail;
495 		}
496 		ASSERT(port != NULL);
497 		nadded++;
498 
499 		/* check capabilities */
500 		if (!aggr_grp_capab_check(grp, port) ||
501 		    !aggr_grp_sdu_check(grp, port) ||
502 		    !aggr_grp_margin_check(grp, port)) {
503 			rc = ENOTSUP;
504 			goto bail;
505 		}
506 
507 		/* start port if group has already been started */
508 		if (grp->lg_started) {
509 			rw_enter(&port->lp_lock, RW_WRITER);
510 			rc = aggr_port_start(port);
511 			if (rc != 0) {
512 				rw_exit(&port->lp_lock);
513 				goto bail;
514 			}
515 
516 			/* set port promiscuous mode */
517 			rc = aggr_port_promisc(port, grp->lg_promisc);
518 			if (rc != 0) {
519 				rw_exit(&port->lp_lock);
520 				goto bail;
521 			}
522 			rw_exit(&port->lp_lock);
523 		}
524 
525 		/*
526 		 * Attach each port if necessary.
527 		 */
528 		if (aggr_port_notify_link(grp, port, B_FALSE))
529 			link_state_changed = B_TRUE;
530 	}
531 
532 	/* update the MAC address of the constituent ports */
533 	if (aggr_grp_update_ports_mac(grp))
534 		link_state_changed = B_TRUE;
535 
536 	if (link_state_changed)
537 		mac_link_update(grp->lg_mh, grp->lg_link_state);
538 
539 bail:
540 	if (rc != 0) {
541 		/* stop and remove ports that have been added */
542 		for (i = 0; i < nadded && !grp->lg_closing; i++) {
543 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
544 			ASSERT(port != NULL);
545 			if (grp->lg_started) {
546 				rw_enter(&port->lp_lock, RW_WRITER);
547 				aggr_port_stop(port);
548 				rw_exit(&port->lp_lock);
549 			}
550 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
551 		}
552 	}
553 
554 	rw_exit(&grp->lg_lock);
555 	AGGR_LACP_UNLOCK(grp);
556 	if (rc == 0 && !grp->lg_closing)
557 		mac_resource_update(grp->lg_mh);
558 	AGGR_GRP_REFRELE(grp);
559 	return (rc);
560 }
561 
562 /*
563  * Update properties of an existing link aggregation group.
564  */
565 int
566 aggr_grp_modify(datalink_id_t linkid, aggr_grp_t *grp_arg, uint8_t update_mask,
567     uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr,
568     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
569 {
570 	int rc = 0;
571 	aggr_grp_t *grp = NULL;
572 	boolean_t mac_addr_changed = B_FALSE;
573 	boolean_t link_state_changed = B_FALSE;
574 
575 	if (grp_arg == NULL) {
576 		/* get group corresponding to linkid */
577 		rw_enter(&aggr_grp_lock, RW_READER);
578 		if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
579 		    (mod_hash_val_t *)&grp) != 0) {
580 			rc = ENOENT;
581 			goto bail;
582 		}
583 		AGGR_LACP_LOCK_WRITER(grp);
584 		rw_enter(&grp->lg_lock, RW_WRITER);
585 	} else {
586 		grp = grp_arg;
587 		ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
588 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
589 	}
590 
591 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
592 	AGGR_GRP_REFHOLD(grp);
593 
594 	/* validate fixed address if specified */
595 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
596 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
597 	    (mac_addr[0] & 0x01))) {
598 		rc = EINVAL;
599 		goto bail;
600 	}
601 
602 	/* update policy if requested */
603 	if (update_mask & AGGR_MODIFY_POLICY)
604 		aggr_send_update_policy(grp, policy);
605 
606 	/* update unicast MAC address if requested */
607 	if (update_mask & AGGR_MODIFY_MAC) {
608 		if (mac_fixed) {
609 			/* user-supplied MAC address */
610 			grp->lg_mac_addr_port = NULL;
611 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
612 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
613 				mac_addr_changed = B_TRUE;
614 			}
615 		} else if (grp->lg_addr_fixed) {
616 			/* switch from user-supplied to automatic */
617 			aggr_port_t *port = grp->lg_ports;
618 
619 			rw_enter(&port->lp_lock, RW_WRITER);
620 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
621 			grp->lg_mac_addr_port = port;
622 			mac_addr_changed = B_TRUE;
623 			rw_exit(&port->lp_lock);
624 		}
625 		grp->lg_addr_fixed = mac_fixed;
626 	}
627 
628 	if (mac_addr_changed)
629 		link_state_changed = aggr_grp_update_ports_mac(grp);
630 
631 	if (update_mask & AGGR_MODIFY_LACP_MODE)
632 		aggr_lacp_update_mode(grp, lacp_mode);
633 
634 	if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing)
635 		aggr_lacp_update_timer(grp, lacp_timer);
636 
637 bail:
638 	if (grp != NULL && !grp->lg_closing) {
639 		/*
640 		 * If grp_arg is non-NULL, this function is called from
641 		 * mac_unicst_set(), and the MAC_NOTE_UNICST notification
642 		 * will be sent there.
643 		 */
644 		if ((grp_arg == NULL) && mac_addr_changed)
645 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
646 
647 		if (link_state_changed)
648 			mac_link_update(grp->lg_mh, grp->lg_link_state);
649 
650 	}
651 
652 	if (grp_arg == NULL) {
653 		if (grp != NULL) {
654 			rw_exit(&grp->lg_lock);
655 			AGGR_LACP_UNLOCK(grp);
656 		}
657 		rw_exit(&aggr_grp_lock);
658 	}
659 
660 	if (grp != NULL)
661 		AGGR_GRP_REFRELE(grp);
662 
663 	return (rc);
664 }
665 
666 /*
667  * Create a new link aggregation group upon request from administrator.
668  * Returns 0 on success, an errno on failure.
669  */
670 int
671 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
672     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
673     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
674 {
675 	aggr_grp_t *grp = NULL;
676 	aggr_port_t *port;
677 	mac_register_t *mac;
678 	boolean_t link_state_changed;
679 	int err;
680 	int i;
681 
682 	/* need at least one port */
683 	if (nports == 0)
684 		return (EINVAL);
685 
686 	rw_enter(&aggr_grp_lock, RW_WRITER);
687 
688 	/* does a group with the same linkid already exist? */
689 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
690 	    (mod_hash_val_t *)&grp);
691 	if (err == 0) {
692 		rw_exit(&aggr_grp_lock);
693 		return (EEXIST);
694 	}
695 
696 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
697 
698 	AGGR_LACP_LOCK_WRITER(grp);
699 	rw_enter(&grp->lg_lock, RW_WRITER);
700 
701 	grp->lg_refs = 1;
702 	grp->lg_closing = B_FALSE;
703 	grp->lg_force = force;
704 	grp->lg_linkid = linkid;
705 	grp->lg_ifspeed = 0;
706 	grp->lg_link_state = LINK_STATE_UNKNOWN;
707 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
708 	grp->lg_started = B_FALSE;
709 	grp->lg_promisc = B_FALSE;
710 	aggr_lacp_init_grp(grp);
711 
712 	/* add MAC ports to group */
713 	grp->lg_ports = NULL;
714 	grp->lg_nports = 0;
715 	grp->lg_nattached_ports = 0;
716 	grp->lg_ntx_ports = 0;
717 
718 	/*
719 	 * If key is not specified by the user, allocate the key.
720 	 */
721 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
722 		err = ENOMEM;
723 		goto bail;
724 	}
725 	grp->lg_key = key;
726 	grp->lg_mcst_list = NULL;
727 
728 	for (i = 0; i < nports; i++) {
729 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
730 		if (err != 0)
731 			goto bail;
732 	}
733 
734 	/*
735 	 * If no explicit MAC address was specified by the administrator,
736 	 * set it to the MAC address of the first port.
737 	 */
738 	grp->lg_addr_fixed = mac_fixed;
739 	if (grp->lg_addr_fixed) {
740 		/* validate specified address */
741 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
742 			err = EINVAL;
743 			goto bail;
744 		}
745 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
746 	} else {
747 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
748 		grp->lg_mac_addr_port = grp->lg_ports;
749 	}
750 
751 	/*
752 	 * Update the MAC address of the constituent ports.
753 	 * None of the port is attached at this time, the link state of the
754 	 * aggregation will not change.
755 	 */
756 	link_state_changed = aggr_grp_update_ports_mac(grp);
757 	ASSERT(!link_state_changed);
758 
759 	/* update outbound load balancing policy */
760 	aggr_send_update_policy(grp, policy);
761 
762 	/* set the initial group capabilities */
763 	aggr_grp_capab_set(grp);
764 
765 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
766 		err = ENOMEM;
767 		goto bail;
768 	}
769 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
770 	mac->m_driver = grp;
771 	mac->m_dip = aggr_dip;
772 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
773 	mac->m_src_addr = grp->lg_addr;
774 	mac->m_callbacks = &aggr_m_callbacks;
775 	mac->m_min_sdu = 0;
776 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
777 	mac->m_margin = aggr_grp_max_margin(grp);
778 	err = mac_register(mac, &grp->lg_mh);
779 	mac_free(mac);
780 	if (err != 0)
781 		goto bail;
782 
783 	if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) {
784 		(void) mac_unregister(grp->lg_mh);
785 		goto bail;
786 	}
787 
788 	/* set LACP mode */
789 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
790 
791 	/*
792 	 * Attach each port if necessary.
793 	 */
794 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
795 		if (aggr_port_notify_link(grp, port, B_FALSE))
796 			link_state_changed = B_TRUE;
797 	}
798 
799 	if (link_state_changed)
800 		mac_link_update(grp->lg_mh, grp->lg_link_state);
801 
802 	/* add new group to hash table */
803 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
804 	    (mod_hash_val_t)grp);
805 	ASSERT(err == 0);
806 	aggr_grp_cnt++;
807 
808 	rw_exit(&grp->lg_lock);
809 	AGGR_LACP_UNLOCK(grp);
810 	rw_exit(&aggr_grp_lock);
811 	return (0);
812 
813 bail:
814 	if (grp != NULL) {
815 		aggr_port_t *cport;
816 
817 		grp->lg_closing = B_TRUE;
818 
819 		port = grp->lg_ports;
820 		while (port != NULL) {
821 			cport = port->lp_next;
822 			aggr_port_delete(port);
823 			port = cport;
824 		}
825 
826 		rw_exit(&grp->lg_lock);
827 		AGGR_LACP_UNLOCK(grp);
828 
829 		AGGR_GRP_REFRELE(grp);
830 	}
831 
832 	rw_exit(&aggr_grp_lock);
833 	return (err);
834 }
835 
836 /*
837  * Return a pointer to the member of a group with specified linkid.
838  */
839 static aggr_port_t *
840 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
841 {
842 	aggr_port_t *port;
843 
844 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
845 
846 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
847 		if (port->lp_linkid == linkid)
848 			break;
849 	}
850 
851 	return (port);
852 }
853 
854 /*
855  * Stop, detach and remove a port from a link aggregation group.
856  */
857 static int
858 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
859     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
860 {
861 	int rc = 0;
862 	aggr_port_t **pport;
863 	boolean_t mac_addr_changed = B_FALSE;
864 	boolean_t link_state_changed = B_FALSE;
865 	uint64_t val;
866 	uint_t i;
867 	uint_t stat;
868 
869 	ASSERT(AGGR_LACP_LOCK_HELD_WRITER(grp));
870 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
871 	ASSERT(grp->lg_nports > 1);
872 	ASSERT(!grp->lg_closing);
873 
874 	/* unlink port */
875 	for (pport = &grp->lg_ports; *pport != port;
876 	    pport = &(*pport)->lp_next) {
877 		if (*pport == NULL) {
878 			rc = ENOENT;
879 			goto done;
880 		}
881 	}
882 	*pport = port->lp_next;
883 
884 	atomic_add_32(&port->lp_closing, 1);
885 
886 	rw_enter(&port->lp_lock, RW_WRITER);
887 
888 	/*
889 	 * If the MAC address of the port being removed was assigned
890 	 * to the group, update the group MAC address
891 	 * using the MAC address of a different port.
892 	 */
893 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
894 		/*
895 		 * Set the MAC address of the group to the
896 		 * MAC address of its first port.
897 		 */
898 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
899 		grp->lg_mac_addr_port = grp->lg_ports;
900 		mac_addr_changed = B_TRUE;
901 	}
902 
903 	link_state_changed = aggr_grp_detach_port(grp, port, B_FALSE);
904 
905 	/*
906 	 * Add the counter statistics of the ports while it was aggregated
907 	 * to the group's residual statistics.  This is done by obtaining
908 	 * the current counter from the underlying MAC then subtracting the
909 	 * value of the counter at the moment it was added to the
910 	 * aggregation.
911 	 */
912 	for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) {
913 		stat = i + MAC_STAT_MIN;
914 		if (!MAC_STAT_ISACOUNTER(stat))
915 			continue;
916 		val = aggr_port_stat(port, stat);
917 		val -= port->lp_stat[i];
918 		grp->lg_stat[i] += val;
919 	}
920 	for (i = 0; i < ETHER_NSTAT && !grp->lg_closing; i++) {
921 		stat = i + MACTYPE_STAT_MIN;
922 		if (!ETHER_STAT_ISACOUNTER(stat))
923 			continue;
924 		val = aggr_port_stat(port, stat);
925 		val -= port->lp_ether_stat[i];
926 		grp->lg_ether_stat[i] += val;
927 	}
928 
929 	grp->lg_nports--;
930 
931 	rw_exit(&port->lp_lock);
932 
933 	aggr_port_delete(port);
934 
935 	/*
936 	 * If the group MAC address has changed, update the MAC address of
937 	 * the remaining constituent ports according to the new MAC
938 	 * address of the group.
939 	 */
940 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
941 		link_state_changed = B_TRUE;
942 
943 done:
944 	if (mac_addr_changedp != NULL)
945 		*mac_addr_changedp = mac_addr_changed;
946 	if (link_state_changedp != NULL)
947 		*link_state_changedp = link_state_changed;
948 
949 	return (rc);
950 }
951 
952 /*
953  * Remove one or more ports from an existing link aggregation group.
954  */
955 int
956 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
957 {
958 	int rc = 0, i;
959 	aggr_grp_t *grp = NULL;
960 	aggr_port_t *port;
961 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
962 	boolean_t link_state_update = B_FALSE, link_state_changed;
963 
964 	/* get group corresponding to linkid */
965 	rw_enter(&aggr_grp_lock, RW_READER);
966 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
967 	    (mod_hash_val_t *)&grp) != 0) {
968 		rw_exit(&aggr_grp_lock);
969 		return (ENOENT);
970 	}
971 	AGGR_GRP_REFHOLD(grp);
972 	rw_exit(&aggr_grp_lock);
973 
974 	AGGR_LACP_LOCK_WRITER(grp);
975 	rw_enter(&grp->lg_lock, RW_WRITER);
976 
977 	/* we need to keep at least one port per group */
978 	if (nports >= grp->lg_nports) {
979 		rc = EINVAL;
980 		goto bail;
981 	}
982 
983 	/* first verify that all the groups are valid */
984 	for (i = 0; i < nports; i++) {
985 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
986 			/* port not found */
987 			rc = ENOENT;
988 			goto bail;
989 		}
990 	}
991 
992 	/* remove the specified ports from group */
993 	for (i = 0; i < nports && !grp->lg_closing; i++) {
994 		/* lookup port */
995 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
996 		ASSERT(port != NULL);
997 
998 		/* stop port if group has already been started */
999 		if (grp->lg_started) {
1000 			rw_enter(&port->lp_lock, RW_WRITER);
1001 			aggr_lacp_port_detached(port);
1002 			aggr_port_stop(port);
1003 			rw_exit(&port->lp_lock);
1004 		}
1005 
1006 		/* remove port from group */
1007 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1008 		    &link_state_changed);
1009 		ASSERT(rc == 0);
1010 		mac_addr_update = mac_addr_update || mac_addr_changed;
1011 		link_state_update = link_state_update || link_state_changed;
1012 	}
1013 
1014 bail:
1015 	rw_exit(&grp->lg_lock);
1016 	AGGR_LACP_UNLOCK(grp);
1017 	if (!grp->lg_closing) {
1018 		if (mac_addr_update)
1019 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
1020 		if (link_state_update)
1021 			mac_link_update(grp->lg_mh, grp->lg_link_state);
1022 		if (rc == 0)
1023 			mac_resource_update(grp->lg_mh);
1024 	}
1025 	AGGR_GRP_REFRELE(grp);
1026 
1027 	return (rc);
1028 }
1029 
1030 int
1031 aggr_grp_delete(datalink_id_t linkid)
1032 {
1033 	aggr_grp_t *grp = NULL;
1034 	aggr_port_t *port, *cport;
1035 	lg_mcst_addr_t *mcst, *mcst_nextp;
1036 	datalink_id_t tmpid;
1037 	mod_hash_val_t val;
1038 	int err;
1039 
1040 	rw_enter(&aggr_grp_lock, RW_WRITER);
1041 
1042 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1043 	    (mod_hash_val_t *)&grp) != 0) {
1044 		rw_exit(&aggr_grp_lock);
1045 		return (ENOENT);
1046 	}
1047 
1048 	/*
1049 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1050 	 * held. Otherwise, it will deadlock if another thread is in
1051 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1052 	 * dls_devnet_destroy() needs to delete.
1053 	 */
1054 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid)) != 0) {
1055 		rw_exit(&aggr_grp_lock);
1056 		return (err);
1057 	}
1058 	ASSERT(linkid == tmpid);
1059 
1060 	AGGR_LACP_LOCK_WRITER(grp);
1061 	rw_enter(&grp->lg_lock, RW_WRITER);
1062 
1063 	/*
1064 	 * Unregister from the MAC service module. Since this can
1065 	 * fail if a client hasn't closed the MAC port, we gracefully
1066 	 * fail the operation.
1067 	 */
1068 	grp->lg_closing = B_TRUE;
1069 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1070 		grp->lg_closing = B_FALSE;
1071 		rw_exit(&grp->lg_lock);
1072 		AGGR_LACP_UNLOCK(grp);
1073 
1074 		(void) dls_devnet_create(grp->lg_mh, linkid);
1075 		rw_exit(&aggr_grp_lock);
1076 		return (err);
1077 	}
1078 
1079 	/*
1080 	 * Free the list of multicast addresses.
1081 	 */
1082 	for (mcst = grp->lg_mcst_list; mcst != NULL; mcst = mcst_nextp) {
1083 		mcst_nextp = mcst->lg_mcst_nextp;
1084 		kmem_free(mcst, sizeof (lg_mcst_addr_t));
1085 	}
1086 	grp->lg_mcst_list = NULL;
1087 
1088 	/* detach and free MAC ports associated with group */
1089 	port = grp->lg_ports;
1090 	while (port != NULL) {
1091 		cport = port->lp_next;
1092 		rw_enter(&port->lp_lock, RW_WRITER);
1093 		aggr_lacp_port_detached(port);
1094 		if (grp->lg_started)
1095 			aggr_port_stop(port);
1096 		(void) aggr_grp_detach_port(grp, port, B_FALSE);
1097 		rw_exit(&port->lp_lock);
1098 		aggr_port_delete(port);
1099 		port = cport;
1100 	}
1101 
1102 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1103 
1104 	rw_exit(&grp->lg_lock);
1105 	AGGR_LACP_UNLOCK(grp);
1106 
1107 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1108 	ASSERT(grp == (aggr_grp_t *)val);
1109 
1110 	ASSERT(aggr_grp_cnt > 0);
1111 	aggr_grp_cnt--;
1112 
1113 	rw_exit(&aggr_grp_lock);
1114 	AGGR_GRP_REFRELE(grp);
1115 
1116 	return (0);
1117 }
1118 
1119 void
1120 aggr_grp_free(aggr_grp_t *grp)
1121 {
1122 	ASSERT(grp->lg_refs == 0);
1123 	if (grp->lg_key > AGGR_MAX_KEY) {
1124 		id_free(key_ids, grp->lg_key);
1125 		grp->lg_key = 0;
1126 	}
1127 	kmem_cache_free(aggr_grp_cache, grp);
1128 }
1129 
1130 int
1131 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1132     aggr_grp_info_new_grp_fn_t new_grp_fn,
1133     aggr_grp_info_new_port_fn_t new_port_fn)
1134 {
1135 	aggr_grp_t	*grp;
1136 	aggr_port_t	*port;
1137 	int		rc = 0;
1138 
1139 	rw_enter(&aggr_grp_lock, RW_READER);
1140 
1141 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1142 	    (mod_hash_val_t *)&grp) != 0) {
1143 		rw_exit(&aggr_grp_lock);
1144 		return (ENOENT);
1145 	}
1146 
1147 	rw_enter(&grp->lg_lock, RW_READER);
1148 
1149 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1150 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1151 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1152 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1153 
1154 	if (rc != 0)
1155 		goto bail;
1156 
1157 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1158 		rw_enter(&port->lp_lock, RW_READER);
1159 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1160 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1161 		rw_exit(&port->lp_lock);
1162 
1163 		if (rc != 0)
1164 			goto bail;
1165 	}
1166 
1167 bail:
1168 	rw_exit(&grp->lg_lock);
1169 	rw_exit(&aggr_grp_lock);
1170 	return (rc);
1171 }
1172 
1173 static void
1174 aggr_m_resources(void *arg)
1175 {
1176 	aggr_grp_t *grp = arg;
1177 	aggr_port_t *port;
1178 
1179 	/* Call each port's m_resources function */
1180 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1181 		mac_resources(port->lp_mh);
1182 }
1183 
1184 /*ARGSUSED*/
1185 static void
1186 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1187 {
1188 	miocnak(q, mp, 0, ENOTSUP);
1189 }
1190 
1191 static int
1192 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1193 {
1194 	aggr_port_t	*port;
1195 	uint_t		stat_index;
1196 
1197 	/* We only aggregate counter statistics. */
1198 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1199 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1200 		return (ENOTSUP);
1201 	}
1202 
1203 	/*
1204 	 * Counter statistics for a group are computed by aggregating the
1205 	 * counters of the members MACs while they were aggregated, plus
1206 	 * the residual counter of the group itself, which is updated each
1207 	 * time a MAC is removed from the group.
1208 	 */
1209 	*val = 0;
1210 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1211 		/* actual port statistic */
1212 		*val += aggr_port_stat(port, stat);
1213 		/*
1214 		 * minus the port stat when it was added, plus any residual
1215 		 * amount for the group.
1216 		 */
1217 		if (IS_MAC_STAT(stat)) {
1218 			stat_index = stat - MAC_STAT_MIN;
1219 			*val -= port->lp_stat[stat_index];
1220 			*val += grp->lg_stat[stat_index];
1221 		} else if (IS_MACTYPE_STAT(stat)) {
1222 			stat_index = stat - MACTYPE_STAT_MIN;
1223 			*val -= port->lp_ether_stat[stat_index];
1224 			*val += grp->lg_ether_stat[stat_index];
1225 		}
1226 	}
1227 	return (0);
1228 }
1229 
1230 static int
1231 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1232 {
1233 	aggr_grp_t	*grp = arg;
1234 	int		rval = 0;
1235 
1236 	rw_enter(&grp->lg_lock, RW_READER);
1237 
1238 	switch (stat) {
1239 	case MAC_STAT_IFSPEED:
1240 		*val = grp->lg_ifspeed;
1241 		break;
1242 
1243 	case ETHER_STAT_LINK_DUPLEX:
1244 		*val = grp->lg_link_duplex;
1245 		break;
1246 
1247 	default:
1248 		/*
1249 		 * For all other statistics, we return the aggregated stat
1250 		 * from the underlying ports.  aggr_grp_stat() will set
1251 		 * rval appropriately if the statistic isn't a counter.
1252 		 */
1253 		rval = aggr_grp_stat(grp, stat, val);
1254 	}
1255 
1256 	rw_exit(&grp->lg_lock);
1257 	return (rval);
1258 }
1259 
1260 static int
1261 aggr_m_start(void *arg)
1262 {
1263 	aggr_grp_t *grp = arg;
1264 	aggr_port_t *port;
1265 
1266 	AGGR_LACP_LOCK_WRITER(grp);
1267 	rw_enter(&grp->lg_lock, RW_WRITER);
1268 
1269 	/*
1270 	 * Attempts to start all configured members of the group.
1271 	 * Group members will be attached when their link-up notification
1272 	 * is received.
1273 	 */
1274 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1275 		rw_enter(&port->lp_lock, RW_WRITER);
1276 		if (aggr_port_start(port) != 0) {
1277 			rw_exit(&port->lp_lock);
1278 			continue;
1279 		}
1280 
1281 		/* set port promiscuous mode */
1282 		if (aggr_port_promisc(port, grp->lg_promisc) != 0)
1283 			aggr_port_stop(port);
1284 		rw_exit(&port->lp_lock);
1285 	}
1286 
1287 	grp->lg_started = B_TRUE;
1288 
1289 	rw_exit(&grp->lg_lock);
1290 	AGGR_LACP_UNLOCK(grp);
1291 
1292 	return (0);
1293 }
1294 
1295 static void
1296 aggr_m_stop(void *arg)
1297 {
1298 	aggr_grp_t *grp = arg;
1299 	aggr_port_t *port;
1300 
1301 	AGGR_LACP_LOCK_WRITER(grp);
1302 	rw_enter(&grp->lg_lock, RW_WRITER);
1303 
1304 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1305 		rw_enter(&port->lp_lock, RW_WRITER);
1306 		aggr_lacp_port_detached(port);
1307 		aggr_port_stop(port);
1308 		rw_exit(&port->lp_lock);
1309 	}
1310 
1311 	grp->lg_started = B_FALSE;
1312 
1313 	rw_exit(&grp->lg_lock);
1314 	AGGR_LACP_UNLOCK(grp);
1315 }
1316 
1317 static int
1318 aggr_m_promisc(void *arg, boolean_t on)
1319 {
1320 	aggr_grp_t *grp = arg;
1321 	aggr_port_t *port;
1322 	boolean_t link_state_changed = B_FALSE;
1323 
1324 	AGGR_LACP_LOCK_WRITER(grp);
1325 	rw_enter(&grp->lg_lock, RW_WRITER);
1326 	AGGR_GRP_REFHOLD(grp);
1327 
1328 	ASSERT(!grp->lg_closing);
1329 
1330 	if (on == grp->lg_promisc)
1331 		goto bail;
1332 
1333 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1334 		rw_enter(&port->lp_lock, RW_WRITER);
1335 		AGGR_PORT_REFHOLD(port);
1336 		if (port->lp_started) {
1337 			if (aggr_port_promisc(port, on) != 0) {
1338 				if (aggr_grp_detach_port(grp, port, B_TRUE))
1339 					link_state_changed = B_TRUE;
1340 			} else {
1341 				/*
1342 				 * If a port was detached because of a previous
1343 				 * failure changing the promiscuity, the port
1344 				 * is reattached when it successfully changes
1345 				 * the promiscuity now, and this might cause
1346 				 * the link state of the aggregation to change.
1347 				 */
1348 				if (aggr_grp_attach_port(grp, port))
1349 					link_state_changed = B_TRUE;
1350 			}
1351 		}
1352 		rw_exit(&port->lp_lock);
1353 		AGGR_PORT_REFRELE(port);
1354 	}
1355 
1356 	grp->lg_promisc = on;
1357 
1358 	if (link_state_changed)
1359 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1360 
1361 bail:
1362 	rw_exit(&grp->lg_lock);
1363 	AGGR_LACP_UNLOCK(grp);
1364 	AGGR_GRP_REFRELE(grp);
1365 
1366 	return (0);
1367 }
1368 
1369 /*
1370  * Initialize the capabilities that are advertised for the group
1371  * according to the capabilities of the constituent ports.
1372  */
1373 static boolean_t
1374 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1375 {
1376 	aggr_grp_t *grp = arg;
1377 
1378 	switch (cap) {
1379 	case MAC_CAPAB_HCKSUM: {
1380 		uint32_t *hcksum_txflags = cap_data;
1381 		*hcksum_txflags = grp->lg_hcksum_txflags;
1382 		break;
1383 	}
1384 	case MAC_CAPAB_POLL:
1385 		/*
1386 		 * There's nothing for us to fill in, we simply return
1387 		 * B_TRUE or B_FALSE to represent the group's support
1388 		 * status for this capability.
1389 		 */
1390 		return (grp->lg_gldv3_polling);
1391 	case MAC_CAPAB_NO_NATIVEVLAN:
1392 		return (!grp->lg_vlan);
1393 	case MAC_CAPAB_NO_ZCOPY:
1394 		return (!grp->lg_zcopy);
1395 	default:
1396 		return (B_FALSE);
1397 	}
1398 	return (B_TRUE);
1399 }
1400 
1401 static int
1402 aggr_grp_multicst(aggr_grp_t *grp, boolean_t add, const uint8_t *addrp)
1403 {
1404 	lg_mcst_addr_t	*mcst, **ppmcst;
1405 
1406 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1407 
1408 	for (ppmcst = &(grp->lg_mcst_list); (mcst = *ppmcst) != NULL;
1409 	    ppmcst = &(mcst->lg_mcst_nextp)) {
1410 		if (bcmp(mcst->lg_mcst_addr, addrp, MAXMACADDRLEN) == 0)
1411 			break;
1412 	}
1413 
1414 	if (add) {
1415 		if (mcst != NULL)
1416 			return (0);
1417 		mcst = kmem_zalloc(sizeof (lg_mcst_addr_t), KM_NOSLEEP);
1418 		if (mcst == NULL)
1419 			return (ENOMEM);
1420 		bcopy(addrp, mcst->lg_mcst_addr, MAXMACADDRLEN);
1421 		*ppmcst = mcst;
1422 	} else {
1423 		if (mcst == NULL)
1424 			return (ENOENT);
1425 		*ppmcst = mcst->lg_mcst_nextp;
1426 		kmem_free(mcst, sizeof (lg_mcst_addr_t));
1427 	}
1428 	return (0);
1429 }
1430 
1431 /*
1432  * Add or remove the multicast addresses that are defined for the group
1433  * to or from the specified port.
1434  * This function is called before stopping a port, before a port
1435  * is detached from a group, and when attaching a port to a group.
1436  */
1437 void
1438 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
1439 {
1440 	aggr_grp_t *grp = port->lp_grp;
1441 	lg_mcst_addr_t	*mcst;
1442 
1443 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
1444 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
1445 
1446 	if (!port->lp_started)
1447 		return;
1448 
1449 	for (mcst = grp->lg_mcst_list; mcst != NULL;
1450 	    mcst = mcst->lg_mcst_nextp)
1451 		(void) aggr_port_multicst(port, add, mcst->lg_mcst_addr);
1452 }
1453 
1454 static int
1455 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1456 {
1457 	aggr_grp_t *grp = arg;
1458 	aggr_port_t *port = NULL;
1459 	int err = 0, cerr;
1460 
1461 	rw_enter(&grp->lg_lock, RW_WRITER);
1462 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1463 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
1464 			continue;
1465 		cerr = aggr_port_multicst(port, add, addrp);
1466 		if (cerr == 0)
1467 			(void) aggr_grp_multicst(grp, add, addrp);
1468 		if (cerr != 0 && err == 0)
1469 			err = cerr;
1470 	}
1471 	rw_exit(&grp->lg_lock);
1472 	return (err);
1473 }
1474 
1475 static int
1476 aggr_m_unicst(void *arg, const uint8_t *macaddr)
1477 {
1478 	aggr_grp_t *grp = arg;
1479 	int rc;
1480 
1481 	AGGR_LACP_LOCK_WRITER(grp);
1482 	rw_enter(&grp->lg_lock, RW_WRITER);
1483 	rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
1484 	    0, 0);
1485 	rw_exit(&grp->lg_lock);
1486 	AGGR_LACP_UNLOCK(grp);
1487 
1488 	return (rc);
1489 }
1490 
1491 /*
1492  * Initialize the capabilities that are advertised for the group
1493  * according to the capabilities of the constituent ports.
1494  */
1495 static void
1496 aggr_grp_capab_set(aggr_grp_t *grp)
1497 {
1498 	uint32_t cksum;
1499 	aggr_port_t *port;
1500 
1501 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1502 	ASSERT(grp->lg_ports != NULL);
1503 
1504 	grp->lg_hcksum_txflags = (uint32_t)-1;
1505 	grp->lg_gldv3_polling = B_TRUE;
1506 	grp->lg_zcopy = B_TRUE;
1507 	grp->lg_vlan = B_TRUE;
1508 
1509 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1510 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
1511 			cksum = 0;
1512 		grp->lg_hcksum_txflags &= cksum;
1513 
1514 		grp->lg_vlan &=
1515 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
1516 
1517 		grp->lg_zcopy &=
1518 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
1519 
1520 		grp->lg_gldv3_polling &=
1521 		    mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL);
1522 	}
1523 }
1524 
1525 /*
1526  * Checks whether the capabilities of the port being added are compatible
1527  * with the current capabilities of the aggregation.
1528  */
1529 static boolean_t
1530 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
1531 {
1532 	uint32_t hcksum_txflags;
1533 
1534 	ASSERT(grp->lg_ports != NULL);
1535 
1536 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
1537 	    grp->lg_vlan) != grp->lg_vlan) {
1538 		return (B_FALSE);
1539 	}
1540 
1541 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
1542 	    grp->lg_zcopy) != grp->lg_zcopy) {
1543 		return (B_FALSE);
1544 	}
1545 
1546 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
1547 		if (grp->lg_hcksum_txflags != 0)
1548 			return (B_FALSE);
1549 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
1550 	    grp->lg_hcksum_txflags) {
1551 		return (B_FALSE);
1552 	}
1553 
1554 	if (mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL) !=
1555 	    grp->lg_gldv3_polling) {
1556 		return (B_FALSE);
1557 	}
1558 
1559 	return (B_TRUE);
1560 }
1561 
1562 /*
1563  * Returns the maximum SDU according to the SDU of the constituent ports.
1564  */
1565 static uint_t
1566 aggr_grp_max_sdu(aggr_grp_t *grp)
1567 {
1568 	uint_t max_sdu = (uint_t)-1;
1569 	aggr_port_t *port;
1570 
1571 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1572 	ASSERT(grp->lg_ports != NULL);
1573 
1574 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1575 		uint_t port_sdu_max;
1576 
1577 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
1578 		if (max_sdu > port_sdu_max)
1579 			max_sdu = port_sdu_max;
1580 	}
1581 
1582 	return (max_sdu);
1583 }
1584 
1585 /*
1586  * Checks if the maximum SDU of the specified port is compatible
1587  * with the maximum SDU of the specified aggregation group, returns
1588  * B_TRUE if it is, B_FALSE otherwise.
1589  */
1590 static boolean_t
1591 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
1592 {
1593 	uint_t port_sdu_max;
1594 
1595 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
1596 	return (port_sdu_max >= grp->lg_max_sdu);
1597 }
1598 
1599 /*
1600  * Returns the maximum margin according to the margin of the constituent ports.
1601  */
1602 static uint32_t
1603 aggr_grp_max_margin(aggr_grp_t *grp)
1604 {
1605 	uint32_t margin = UINT32_MAX;
1606 	aggr_port_t *port;
1607 
1608 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1609 	ASSERT(grp->lg_ports != NULL);
1610 
1611 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1612 		if (margin > port->lp_margin)
1613 			margin = port->lp_margin;
1614 	}
1615 
1616 	grp->lg_margin = margin;
1617 	return (margin);
1618 }
1619 
1620 /*
1621  * Checks if the maximum margin of the specified port is compatible
1622  * with the maximum margin of the specified aggregation group, returns
1623  * B_TRUE if it is, B_FALSE otherwise.
1624  */
1625 static boolean_t
1626 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
1627 {
1628 	if (port->lp_margin >= grp->lg_margin)
1629 		return (B_TRUE);
1630 
1631 	/*
1632 	 * See whether the current margin value is allowed to be changed to
1633 	 * the new value.
1634 	 */
1635 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
1636 		return (B_FALSE);
1637 
1638 	grp->lg_margin = port->lp_margin;
1639 	return (B_TRUE);
1640 }
1641