xref: /titanic_52/usr/src/uts/common/io/aggr/aggr_grp.c (revision c1ecd8b9404ee0d96d93f02e82c441b9bb149a3d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
30  *
31  * An instance of the structure aggr_grp_t is allocated for each
32  * link aggregation group. When created, aggr_grp_t objects are
33  * entered into the aggr_grp_hash hash table maintained by the modhash
34  * module. The hash key is the linkid associated with the link
35  * aggregation group.
36  *
37  * A set of MAC ports are associated with each association group.
38  */
39 
40 #include <sys/types.h>
41 #include <sys/sysmacros.h>
42 #include <sys/conf.h>
43 #include <sys/cmn_err.h>
44 #include <sys/list.h>
45 #include <sys/ksynch.h>
46 #include <sys/kmem.h>
47 #include <sys/stream.h>
48 #include <sys/modctl.h>
49 #include <sys/ddi.h>
50 #include <sys/sunddi.h>
51 #include <sys/atomic.h>
52 #include <sys/stat.h>
53 #include <sys/modhash.h>
54 #include <sys/id_space.h>
55 #include <sys/strsun.h>
56 #include <sys/dlpi.h>
57 #include <sys/dls.h>
58 #include <sys/vlan.h>
59 #include <sys/aggr.h>
60 #include <sys/aggr_impl.h>
61 
62 static int aggr_m_start(void *);
63 static void aggr_m_stop(void *);
64 static int aggr_m_promisc(void *, boolean_t);
65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
66 static int aggr_m_unicst(void *, const uint8_t *);
67 static int aggr_m_stat(void *, uint_t, uint64_t *);
68 static void aggr_m_resources(void *);
69 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
70 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
71 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
72 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
73     boolean_t *);
74 
75 static void aggr_grp_capab_set(aggr_grp_t *);
76 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
77 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
78 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
79 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
80 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
81 static int aggr_grp_multicst(aggr_grp_t *grp, boolean_t add,
82     const uint8_t *addrp);
83 
84 static kmem_cache_t	*aggr_grp_cache;
85 static mod_hash_t	*aggr_grp_hash;
86 static krwlock_t	aggr_grp_lock;
87 static uint_t		aggr_grp_cnt;
88 static id_space_t	*key_ids;
89 
90 #define	GRP_HASHSZ		64
91 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
92 
93 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
94 
95 #define	AGGR_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_IOCTL | MC_GETCAPAB)
96 
97 static mac_callbacks_t aggr_m_callbacks = {
98 	AGGR_M_CALLBACK_FLAGS,
99 	aggr_m_stat,
100 	aggr_m_start,
101 	aggr_m_stop,
102 	aggr_m_promisc,
103 	aggr_m_multicst,
104 	aggr_m_unicst,
105 	aggr_m_tx,
106 	aggr_m_resources,
107 	aggr_m_ioctl,
108 	aggr_m_capab_get
109 };
110 
111 /*ARGSUSED*/
112 static int
113 aggr_grp_constructor(void *buf, void *arg, int kmflag)
114 {
115 	aggr_grp_t *grp = buf;
116 
117 	bzero(grp, sizeof (*grp));
118 	rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL);
119 	mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL);
120 
121 	grp->lg_link_state = LINK_STATE_UNKNOWN;
122 
123 	return (0);
124 }
125 
126 /*ARGSUSED*/
127 static void
128 aggr_grp_destructor(void *buf, void *arg)
129 {
130 	aggr_grp_t *grp = buf;
131 
132 	if (grp->lg_tx_ports != NULL) {
133 		kmem_free(grp->lg_tx_ports,
134 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
135 	}
136 
137 	mutex_destroy(&grp->aggr.gl_lock);
138 	rw_destroy(&grp->lg_lock);
139 }
140 
141 void
142 aggr_grp_init(void)
143 {
144 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
145 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
146 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
147 
148 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
149 	    GRP_HASHSZ, mod_hash_null_valdtor);
150 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
151 	aggr_grp_cnt = 0;
152 
153 	/*
154 	 * Allocate an id space to manage key values (when key is not
155 	 * specified). The range of the id space will be from
156 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
157 	 * uses a 16-bit key.
158 	 */
159 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
160 	ASSERT(key_ids != NULL);
161 }
162 
163 void
164 aggr_grp_fini(void)
165 {
166 	id_space_destroy(key_ids);
167 	rw_destroy(&aggr_grp_lock);
168 	mod_hash_destroy_idhash(aggr_grp_hash);
169 	kmem_cache_destroy(aggr_grp_cache);
170 }
171 
172 uint_t
173 aggr_grp_count(void)
174 {
175 	uint_t	count;
176 
177 	rw_enter(&aggr_grp_lock, RW_READER);
178 	count = aggr_grp_cnt;
179 	rw_exit(&aggr_grp_lock);
180 	return (count);
181 }
182 
183 /*
184  * Attach a port to a link aggregation group.
185  *
186  * A port is attached to a link aggregation group once its speed
187  * and link state have been verified.
188  *
189  * Returns B_TRUE if the group link state or speed has changed. If
190  * it's the case, the caller must notify the MAC layer via a call
191  * to mac_link().
192  */
193 boolean_t
194 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
195 {
196 	boolean_t link_state_changed = B_FALSE;
197 
198 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
199 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
200 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
201 
202 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
203 		return (B_FALSE);
204 
205 	/*
206 	 * Validate the MAC port link speed and update the group
207 	 * link speed if needed.
208 	 */
209 	if (port->lp_ifspeed == 0 ||
210 	    port->lp_link_state != LINK_STATE_UP ||
211 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
212 		/*
213 		 * Can't attach a MAC port with unknown link speed,
214 		 * down link, or not in full duplex mode.
215 		 */
216 		return (B_FALSE);
217 	}
218 
219 	if (grp->lg_ifspeed == 0) {
220 		/*
221 		 * The group inherits the speed of the first link being
222 		 * attached.
223 		 */
224 		grp->lg_ifspeed = port->lp_ifspeed;
225 		link_state_changed = B_TRUE;
226 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
227 		/*
228 		 * The link speed of the MAC port must be the same as
229 		 * the group link speed, as per 802.3ad. Since it is
230 		 * not, the attach is cancelled.
231 		 */
232 		return (B_FALSE);
233 	}
234 
235 	grp->lg_nattached_ports++;
236 
237 	/*
238 	 * Update the group link state.
239 	 */
240 	if (grp->lg_link_state != LINK_STATE_UP) {
241 		grp->lg_link_state = LINK_STATE_UP;
242 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
243 		link_state_changed = B_TRUE;
244 	}
245 
246 	aggr_grp_multicst_port(port, B_TRUE);
247 
248 	/*
249 	 * Update port's state.
250 	 */
251 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
252 
253 	/*
254 	 * Set port's receive callback
255 	 */
256 	port->lp_mrh = mac_rx_add(port->lp_mh, aggr_recv_cb, (void *)port);
257 
258 	/*
259 	 * If LACP is OFF, the port can be used to send data as soon
260 	 * as its link is up and verified to be compatible with the
261 	 * aggregation.
262 	 *
263 	 * If LACP is active or passive, notify the LACP subsystem, which
264 	 * will enable sending on the port following the LACP protocol.
265 	 */
266 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
267 		aggr_send_port_enable(port);
268 	else
269 		aggr_lacp_port_attached(port);
270 
271 	return (link_state_changed);
272 }
273 
274 boolean_t
275 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
276 {
277 	boolean_t link_state_changed = B_FALSE;
278 
279 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
280 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
281 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
282 
283 	/* update state */
284 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
285 		return (B_FALSE);
286 
287 	mac_rx_remove(port->lp_mh, port->lp_mrh, B_FALSE);
288 	port->lp_state = AGGR_PORT_STATE_STANDBY;
289 
290 	aggr_grp_multicst_port(port, B_FALSE);
291 
292 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
293 		aggr_send_port_disable(port);
294 	else
295 		aggr_lacp_port_detached(port);
296 
297 	grp->lg_nattached_ports--;
298 	if (grp->lg_nattached_ports == 0) {
299 		/* the last attached MAC port of the group is being detached */
300 		grp->lg_ifspeed = 0;
301 		grp->lg_link_state = LINK_STATE_DOWN;
302 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
303 		link_state_changed = B_TRUE;
304 	}
305 
306 	return (link_state_changed);
307 }
308 
309 /*
310  * Update the MAC addresses of the constituent ports of the specified
311  * group. This function is invoked:
312  * - after creating a new aggregation group.
313  * - after adding new ports to an aggregation group.
314  * - after removing a port from a group when the MAC address of
315  *   that port was used for the MAC address of the group.
316  * - after the MAC address of a port changed when the MAC address
317  *   of that port was used for the MAC address of the group.
318  *
319  * Return true if the link state of the aggregation changed, for example
320  * as a result of a failure changing the MAC address of one of the
321  * constituent ports.
322  */
323 boolean_t
324 aggr_grp_update_ports_mac(aggr_grp_t *grp)
325 {
326 	aggr_port_t *cport;
327 	boolean_t link_state_changed = B_FALSE;
328 
329 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
330 
331 	if (grp->lg_closing)
332 		return (link_state_changed);
333 
334 	for (cport = grp->lg_ports; cport != NULL;
335 	    cport = cport->lp_next) {
336 		rw_enter(&cport->lp_lock, RW_WRITER);
337 		if (aggr_port_unicst(cport, grp->lg_addr) != 0) {
338 			if (aggr_grp_detach_port(grp, cport))
339 				link_state_changed = B_TRUE;
340 		} else {
341 			/*
342 			 * If a port was detached because of a previous
343 			 * failure changing the MAC address, the port is
344 			 * reattached when it successfully changes the MAC
345 			 * address now, and this might cause the link state
346 			 * of the aggregation to change.
347 			 */
348 			if (aggr_grp_attach_port(grp, cport))
349 				link_state_changed = B_TRUE;
350 		}
351 		rw_exit(&cport->lp_lock);
352 	}
353 	return (link_state_changed);
354 }
355 
356 /*
357  * Invoked when the MAC address of a port has changed. If the port's
358  * MAC address was used for the group MAC address, set mac_addr_changedp
359  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
360  * notification. If the link state changes due to detach/attach of
361  * the constituent port, set link_state_changedp to B_TRUE to indicate
362  * to the caller that it should send a MAC_NOTE_LINK notification. In both
363  * cases, it is the responsibility of the caller to invoke notification
364  * functions after releasing the the port lock.
365  */
366 void
367 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
368     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
369 {
370 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
371 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
372 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
373 	ASSERT(mac_addr_changedp != NULL);
374 	ASSERT(link_state_changedp != NULL);
375 
376 	*mac_addr_changedp = B_FALSE;
377 	*link_state_changedp = B_FALSE;
378 
379 	if (grp->lg_addr_fixed) {
380 		/*
381 		 * The group is using a fixed MAC address or an automatic
382 		 * MAC address has not been set.
383 		 */
384 		return;
385 	}
386 
387 	if (grp->lg_mac_addr_port == port) {
388 		/*
389 		 * The MAC address of the port was assigned to the group
390 		 * MAC address. Update the group MAC address.
391 		 */
392 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
393 		*mac_addr_changedp = B_TRUE;
394 	} else {
395 		/*
396 		 * Update the actual port MAC address to the MAC address
397 		 * of the group.
398 		 */
399 		if (aggr_port_unicst(port, grp->lg_addr) != 0) {
400 			*link_state_changedp = aggr_grp_detach_port(grp, port);
401 		} else {
402 			/*
403 			 * If a port was detached because of a previous
404 			 * failure changing the MAC address, the port is
405 			 * reattached when it successfully changes the MAC
406 			 * address now, and this might cause the link state
407 			 * of the aggregation to change.
408 			 */
409 			*link_state_changedp = aggr_grp_attach_port(grp, port);
410 		}
411 	}
412 }
413 
414 /*
415  * Add a port to a link aggregation group.
416  */
417 static int
418 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t linkid, boolean_t force,
419     aggr_port_t **pp)
420 {
421 	aggr_port_t *port, **cport;
422 	int err;
423 
424 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
425 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
426 
427 	/* create new port */
428 	err = aggr_port_create(linkid, force, &port);
429 	if (err != 0)
430 		return (err);
431 
432 	rw_enter(&port->lp_lock, RW_WRITER);
433 
434 	/* add port to list of group constituent ports */
435 	cport = &grp->lg_ports;
436 	while (*cport != NULL)
437 		cport = &((*cport)->lp_next);
438 	*cport = port;
439 
440 	/*
441 	 * Back reference to the group it is member of. A port always
442 	 * holds a reference to its group to ensure that the back
443 	 * reference is always valid.
444 	 */
445 	port->lp_grp = grp;
446 	AGGR_GRP_REFHOLD(grp);
447 	grp->lg_nports++;
448 
449 	aggr_lacp_init_port(port);
450 
451 	/*
452 	 * Initialize the callback functions for this port. Note that this
453 	 * can only be done after the lp_grp field is set.
454 	 */
455 	aggr_port_init_callbacks(port);
456 
457 	rw_exit(&port->lp_lock);
458 
459 	if (pp != NULL)
460 		*pp = port;
461 
462 	return (0);
463 }
464 
465 /*
466  * Add one or more ports to an existing link aggregation group.
467  */
468 int
469 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
470     laioc_port_t *ports)
471 {
472 	int rc, i, nadded = 0;
473 	aggr_grp_t *grp = NULL;
474 	aggr_port_t *port;
475 	boolean_t link_state_changed = B_FALSE;
476 
477 	/* get group corresponding to linkid */
478 	rw_enter(&aggr_grp_lock, RW_READER);
479 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
480 	    (mod_hash_val_t *)&grp) != 0) {
481 		rw_exit(&aggr_grp_lock);
482 		return (ENOENT);
483 	}
484 	AGGR_GRP_REFHOLD(grp);
485 	rw_exit(&aggr_grp_lock);
486 
487 	AGGR_LACP_LOCK(grp);
488 	rw_enter(&grp->lg_lock, RW_WRITER);
489 
490 	/* add the specified ports to group */
491 	for (i = 0; i < nports; i++) {
492 		/* add port to group */
493 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
494 		    force, &port)) != 0) {
495 			goto bail;
496 		}
497 		ASSERT(port != NULL);
498 		nadded++;
499 
500 		/* check capabilities */
501 		if (!aggr_grp_capab_check(grp, port) ||
502 		    !aggr_grp_sdu_check(grp, port) ||
503 		    !aggr_grp_margin_check(grp, port)) {
504 			rc = ENOTSUP;
505 			goto bail;
506 		}
507 
508 		/* start port if group has already been started */
509 		if (grp->lg_started) {
510 			rw_enter(&port->lp_lock, RW_WRITER);
511 			rc = aggr_port_start(port);
512 			if (rc != 0) {
513 				rw_exit(&port->lp_lock);
514 				goto bail;
515 			}
516 
517 			/* set port promiscuous mode */
518 			rc = aggr_port_promisc(port, grp->lg_promisc);
519 			if (rc != 0) {
520 				rw_exit(&port->lp_lock);
521 				goto bail;
522 			}
523 			rw_exit(&port->lp_lock);
524 		}
525 
526 		/*
527 		 * Attach each port if necessary.
528 		 */
529 		if (aggr_port_notify_link(grp, port, B_FALSE))
530 			link_state_changed = B_TRUE;
531 	}
532 
533 	/* update the MAC address of the constituent ports */
534 	if (aggr_grp_update_ports_mac(grp))
535 		link_state_changed = B_TRUE;
536 
537 	if (link_state_changed)
538 		mac_link_update(grp->lg_mh, grp->lg_link_state);
539 
540 bail:
541 	if (rc != 0) {
542 		/* stop and remove ports that have been added */
543 		for (i = 0; i < nadded && !grp->lg_closing; i++) {
544 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
545 			ASSERT(port != NULL);
546 			if (grp->lg_started) {
547 				rw_enter(&port->lp_lock, RW_WRITER);
548 				aggr_port_stop(port);
549 				rw_exit(&port->lp_lock);
550 			}
551 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
552 		}
553 	}
554 
555 	rw_exit(&grp->lg_lock);
556 	AGGR_LACP_UNLOCK(grp);
557 	if (rc == 0 && !grp->lg_closing)
558 		mac_resource_update(grp->lg_mh);
559 	AGGR_GRP_REFRELE(grp);
560 	return (rc);
561 }
562 
563 /*
564  * Update properties of an existing link aggregation group.
565  */
566 int
567 aggr_grp_modify(datalink_id_t linkid, aggr_grp_t *grp_arg, uint8_t update_mask,
568     uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr,
569     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
570 {
571 	int rc = 0;
572 	aggr_grp_t *grp = NULL;
573 	boolean_t mac_addr_changed = B_FALSE;
574 	boolean_t link_state_changed = B_FALSE;
575 
576 	if (grp_arg == NULL) {
577 		/* get group corresponding to linkid */
578 		rw_enter(&aggr_grp_lock, RW_READER);
579 		if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
580 		    (mod_hash_val_t *)&grp) != 0) {
581 			rc = ENOENT;
582 			goto bail;
583 		}
584 		AGGR_LACP_LOCK(grp);
585 		rw_enter(&grp->lg_lock, RW_WRITER);
586 	} else {
587 		grp = grp_arg;
588 		ASSERT(AGGR_LACP_LOCK_HELD(grp));
589 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
590 	}
591 
592 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
593 	AGGR_GRP_REFHOLD(grp);
594 
595 	/* validate fixed address if specified */
596 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
597 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
598 	    (mac_addr[0] & 0x01))) {
599 		rc = EINVAL;
600 		goto bail;
601 	}
602 
603 	/* update policy if requested */
604 	if (update_mask & AGGR_MODIFY_POLICY)
605 		aggr_send_update_policy(grp, policy);
606 
607 	/* update unicast MAC address if requested */
608 	if (update_mask & AGGR_MODIFY_MAC) {
609 		if (mac_fixed) {
610 			/* user-supplied MAC address */
611 			grp->lg_mac_addr_port = NULL;
612 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
613 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
614 				mac_addr_changed = B_TRUE;
615 			}
616 		} else if (grp->lg_addr_fixed) {
617 			/* switch from user-supplied to automatic */
618 			aggr_port_t *port = grp->lg_ports;
619 
620 			rw_enter(&port->lp_lock, RW_WRITER);
621 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
622 			grp->lg_mac_addr_port = port;
623 			mac_addr_changed = B_TRUE;
624 			rw_exit(&port->lp_lock);
625 		}
626 		grp->lg_addr_fixed = mac_fixed;
627 	}
628 
629 	if (mac_addr_changed)
630 		link_state_changed = aggr_grp_update_ports_mac(grp);
631 
632 	if (update_mask & AGGR_MODIFY_LACP_MODE)
633 		aggr_lacp_update_mode(grp, lacp_mode);
634 
635 	if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing)
636 		aggr_lacp_update_timer(grp, lacp_timer);
637 
638 bail:
639 	if (grp != NULL && !grp->lg_closing) {
640 		/*
641 		 * If grp_arg is non-NULL, this function is called from
642 		 * mac_unicst_set(), and the MAC_NOTE_UNICST notification
643 		 * will be sent there.
644 		 */
645 		if ((grp_arg == NULL) && mac_addr_changed)
646 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
647 
648 		if (link_state_changed)
649 			mac_link_update(grp->lg_mh, grp->lg_link_state);
650 
651 	}
652 
653 	if (grp_arg == NULL) {
654 		if (grp != NULL) {
655 			rw_exit(&grp->lg_lock);
656 			AGGR_LACP_UNLOCK(grp);
657 		}
658 		rw_exit(&aggr_grp_lock);
659 	}
660 
661 	if (grp != NULL)
662 		AGGR_GRP_REFRELE(grp);
663 
664 	return (rc);
665 }
666 
667 /*
668  * Create a new link aggregation group upon request from administrator.
669  * Returns 0 on success, an errno on failure.
670  */
671 int
672 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
673     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
674     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
675 {
676 	aggr_grp_t *grp = NULL;
677 	aggr_port_t *port;
678 	mac_register_t *mac;
679 	boolean_t link_state_changed;
680 	int err;
681 	int i;
682 
683 	/* need at least one port */
684 	if (nports == 0)
685 		return (EINVAL);
686 
687 	rw_enter(&aggr_grp_lock, RW_WRITER);
688 
689 	/* does a group with the same linkid already exist? */
690 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
691 	    (mod_hash_val_t *)&grp);
692 	if (err == 0) {
693 		rw_exit(&aggr_grp_lock);
694 		return (EEXIST);
695 	}
696 
697 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
698 
699 	AGGR_LACP_LOCK(grp);
700 	rw_enter(&grp->lg_lock, RW_WRITER);
701 
702 	grp->lg_refs = 1;
703 	grp->lg_closing = B_FALSE;
704 	grp->lg_force = force;
705 	grp->lg_linkid = linkid;
706 	grp->lg_ifspeed = 0;
707 	grp->lg_link_state = LINK_STATE_UNKNOWN;
708 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
709 	grp->lg_started = B_FALSE;
710 	grp->lg_promisc = B_FALSE;
711 	aggr_lacp_init_grp(grp);
712 
713 	/* add MAC ports to group */
714 	grp->lg_ports = NULL;
715 	grp->lg_nports = 0;
716 	grp->lg_nattached_ports = 0;
717 	grp->lg_ntx_ports = 0;
718 
719 	/*
720 	 * If key is not specified by the user, allocate the key.
721 	 */
722 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
723 		err = ENOMEM;
724 		goto bail;
725 	}
726 	grp->lg_key = key;
727 	grp->lg_mcst_list = NULL;
728 
729 	for (i = 0; i < nports; i++) {
730 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
731 		if (err != 0)
732 			goto bail;
733 	}
734 
735 	/*
736 	 * If no explicit MAC address was specified by the administrator,
737 	 * set it to the MAC address of the first port.
738 	 */
739 	grp->lg_addr_fixed = mac_fixed;
740 	if (grp->lg_addr_fixed) {
741 		/* validate specified address */
742 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
743 			err = EINVAL;
744 			goto bail;
745 		}
746 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
747 	} else {
748 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
749 		grp->lg_mac_addr_port = grp->lg_ports;
750 	}
751 
752 	/*
753 	 * Update the MAC address of the constituent ports.
754 	 * None of the port is attached at this time, the link state of the
755 	 * aggregation will not change.
756 	 */
757 	link_state_changed = aggr_grp_update_ports_mac(grp);
758 	ASSERT(!link_state_changed);
759 
760 	/* update outbound load balancing policy */
761 	aggr_send_update_policy(grp, policy);
762 
763 	/* set the initial group capabilities */
764 	aggr_grp_capab_set(grp);
765 
766 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
767 		err = ENOMEM;
768 		goto bail;
769 	}
770 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
771 	mac->m_driver = grp;
772 	mac->m_dip = aggr_dip;
773 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
774 	mac->m_src_addr = grp->lg_addr;
775 	mac->m_callbacks = &aggr_m_callbacks;
776 	mac->m_min_sdu = 0;
777 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
778 	mac->m_margin = aggr_grp_max_margin(grp);
779 	err = mac_register(mac, &grp->lg_mh);
780 	mac_free(mac);
781 	if (err != 0)
782 		goto bail;
783 
784 	if ((err = dls_devnet_create(grp->lg_mh, grp->lg_linkid)) != 0) {
785 		(void) mac_unregister(grp->lg_mh);
786 		goto bail;
787 	}
788 
789 	/* set LACP mode */
790 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
791 
792 	/*
793 	 * Attach each port if necessary.
794 	 */
795 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
796 		if (aggr_port_notify_link(grp, port, B_FALSE))
797 			link_state_changed = B_TRUE;
798 	}
799 
800 	if (link_state_changed)
801 		mac_link_update(grp->lg_mh, grp->lg_link_state);
802 
803 	/* add new group to hash table */
804 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
805 	    (mod_hash_val_t)grp);
806 	ASSERT(err == 0);
807 	aggr_grp_cnt++;
808 
809 	rw_exit(&grp->lg_lock);
810 	AGGR_LACP_UNLOCK(grp);
811 	rw_exit(&aggr_grp_lock);
812 	return (0);
813 
814 bail:
815 	if (grp != NULL) {
816 		aggr_port_t *cport;
817 
818 		grp->lg_closing = B_TRUE;
819 
820 		port = grp->lg_ports;
821 		while (port != NULL) {
822 			cport = port->lp_next;
823 			aggr_port_delete(port);
824 			port = cport;
825 		}
826 
827 		rw_exit(&grp->lg_lock);
828 		AGGR_LACP_UNLOCK(grp);
829 
830 		AGGR_GRP_REFRELE(grp);
831 	}
832 
833 	rw_exit(&aggr_grp_lock);
834 	return (err);
835 }
836 
837 /*
838  * Return a pointer to the member of a group with specified linkid.
839  */
840 static aggr_port_t *
841 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
842 {
843 	aggr_port_t *port;
844 
845 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
846 
847 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
848 		if (port->lp_linkid == linkid)
849 			break;
850 	}
851 
852 	return (port);
853 }
854 
855 /*
856  * Stop, detach and remove a port from a link aggregation group.
857  */
858 static int
859 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
860     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
861 {
862 	int rc = 0;
863 	aggr_port_t **pport;
864 	boolean_t mac_addr_changed = B_FALSE;
865 	boolean_t link_state_changed = B_FALSE;
866 	uint64_t val;
867 	uint_t i;
868 	uint_t stat;
869 
870 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
871 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
872 	ASSERT(grp->lg_nports > 1);
873 	ASSERT(!grp->lg_closing);
874 
875 	/* unlink port */
876 	for (pport = &grp->lg_ports; *pport != port;
877 	    pport = &(*pport)->lp_next) {
878 		if (*pport == NULL) {
879 			rc = ENOENT;
880 			goto done;
881 		}
882 	}
883 	*pport = port->lp_next;
884 
885 	atomic_add_32(&port->lp_closing, 1);
886 
887 	rw_enter(&port->lp_lock, RW_WRITER);
888 
889 	/*
890 	 * If the MAC address of the port being removed was assigned
891 	 * to the group, update the group MAC address
892 	 * using the MAC address of a different port.
893 	 */
894 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
895 		/*
896 		 * Set the MAC address of the group to the
897 		 * MAC address of its first port.
898 		 */
899 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
900 		grp->lg_mac_addr_port = grp->lg_ports;
901 		mac_addr_changed = B_TRUE;
902 	}
903 
904 	link_state_changed = aggr_grp_detach_port(grp, port);
905 
906 	/*
907 	 * Add the counter statistics of the ports while it was aggregated
908 	 * to the group's residual statistics.  This is done by obtaining
909 	 * the current counter from the underlying MAC then subtracting the
910 	 * value of the counter at the moment it was added to the
911 	 * aggregation.
912 	 */
913 	for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) {
914 		stat = i + MAC_STAT_MIN;
915 		if (!MAC_STAT_ISACOUNTER(stat))
916 			continue;
917 		val = aggr_port_stat(port, stat);
918 		val -= port->lp_stat[i];
919 		grp->lg_stat[i] += val;
920 	}
921 	for (i = 0; i < ETHER_NSTAT && !grp->lg_closing; i++) {
922 		stat = i + MACTYPE_STAT_MIN;
923 		if (!ETHER_STAT_ISACOUNTER(stat))
924 			continue;
925 		val = aggr_port_stat(port, stat);
926 		val -= port->lp_ether_stat[i];
927 		grp->lg_ether_stat[i] += val;
928 	}
929 
930 	grp->lg_nports--;
931 
932 	rw_exit(&port->lp_lock);
933 
934 	aggr_port_delete(port);
935 
936 	/*
937 	 * If the group MAC address has changed, update the MAC address of
938 	 * the remaining constituent ports according to the new MAC
939 	 * address of the group.
940 	 */
941 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
942 		link_state_changed = B_TRUE;
943 
944 done:
945 	if (mac_addr_changedp != NULL)
946 		*mac_addr_changedp = mac_addr_changed;
947 	if (link_state_changedp != NULL)
948 		*link_state_changedp = link_state_changed;
949 
950 	return (rc);
951 }
952 
953 /*
954  * Remove one or more ports from an existing link aggregation group.
955  */
956 int
957 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
958 {
959 	int rc = 0, i;
960 	aggr_grp_t *grp = NULL;
961 	aggr_port_t *port;
962 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
963 	boolean_t link_state_update = B_FALSE, link_state_changed;
964 
965 	/* get group corresponding to linkid */
966 	rw_enter(&aggr_grp_lock, RW_READER);
967 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
968 	    (mod_hash_val_t *)&grp) != 0) {
969 		rw_exit(&aggr_grp_lock);
970 		return (ENOENT);
971 	}
972 	AGGR_GRP_REFHOLD(grp);
973 	rw_exit(&aggr_grp_lock);
974 
975 	AGGR_LACP_LOCK(grp);
976 	rw_enter(&grp->lg_lock, RW_WRITER);
977 
978 	/* we need to keep at least one port per group */
979 	if (nports >= grp->lg_nports) {
980 		rc = EINVAL;
981 		goto bail;
982 	}
983 
984 	/* first verify that all the groups are valid */
985 	for (i = 0; i < nports; i++) {
986 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
987 			/* port not found */
988 			rc = ENOENT;
989 			goto bail;
990 		}
991 	}
992 
993 	/* remove the specified ports from group */
994 	for (i = 0; i < nports && !grp->lg_closing; i++) {
995 		/* lookup port */
996 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
997 		ASSERT(port != NULL);
998 
999 		/* stop port if group has already been started */
1000 		if (grp->lg_started) {
1001 			rw_enter(&port->lp_lock, RW_WRITER);
1002 			aggr_port_stop(port);
1003 			rw_exit(&port->lp_lock);
1004 		}
1005 
1006 		/* remove port from group */
1007 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1008 		    &link_state_changed);
1009 		ASSERT(rc == 0);
1010 		mac_addr_update = mac_addr_update || mac_addr_changed;
1011 		link_state_update = link_state_update || link_state_changed;
1012 	}
1013 
1014 bail:
1015 	rw_exit(&grp->lg_lock);
1016 	AGGR_LACP_UNLOCK(grp);
1017 	if (!grp->lg_closing) {
1018 		if (mac_addr_update)
1019 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
1020 		if (link_state_update)
1021 			mac_link_update(grp->lg_mh, grp->lg_link_state);
1022 		if (rc == 0)
1023 			mac_resource_update(grp->lg_mh);
1024 	}
1025 	AGGR_GRP_REFRELE(grp);
1026 
1027 	return (rc);
1028 }
1029 
1030 int
1031 aggr_grp_delete(datalink_id_t linkid)
1032 {
1033 	aggr_grp_t *grp = NULL;
1034 	aggr_port_t *port, *cport;
1035 	lg_mcst_addr_t *mcst, *mcst_nextp;
1036 	datalink_id_t tmpid;
1037 	mod_hash_val_t val;
1038 	int err;
1039 
1040 	rw_enter(&aggr_grp_lock, RW_WRITER);
1041 
1042 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1043 	    (mod_hash_val_t *)&grp) != 0) {
1044 		rw_exit(&aggr_grp_lock);
1045 		return (ENOENT);
1046 	}
1047 
1048 	/*
1049 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1050 	 * held. Otherwise, it will deadlock if another thread is in
1051 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1052 	 * dls_devnet_destroy() needs to delete.
1053 	 */
1054 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid)) != 0) {
1055 		rw_exit(&aggr_grp_lock);
1056 		return (err);
1057 	}
1058 	ASSERT(linkid == tmpid);
1059 
1060 	AGGR_LACP_LOCK(grp);
1061 	rw_enter(&grp->lg_lock, RW_WRITER);
1062 
1063 	/*
1064 	 * Unregister from the MAC service module. Since this can
1065 	 * fail if a client hasn't closed the MAC port, we gracefully
1066 	 * fail the operation.
1067 	 */
1068 	grp->lg_closing = B_TRUE;
1069 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1070 		grp->lg_closing = B_FALSE;
1071 		rw_exit(&grp->lg_lock);
1072 		AGGR_LACP_UNLOCK(grp);
1073 
1074 		(void) dls_devnet_create(grp->lg_mh, linkid);
1075 		rw_exit(&aggr_grp_lock);
1076 		return (err);
1077 	}
1078 
1079 	/*
1080 	 * Free the list of multicast addresses.
1081 	 */
1082 	for (mcst = grp->lg_mcst_list; mcst != NULL; mcst = mcst_nextp) {
1083 		mcst_nextp = mcst->lg_mcst_nextp;
1084 		kmem_free(mcst, sizeof (lg_mcst_addr_t));
1085 	}
1086 	grp->lg_mcst_list = NULL;
1087 
1088 	/* detach and free MAC ports associated with group */
1089 	port = grp->lg_ports;
1090 	while (port != NULL) {
1091 		cport = port->lp_next;
1092 		rw_enter(&port->lp_lock, RW_WRITER);
1093 		if (grp->lg_started)
1094 			aggr_port_stop(port);
1095 		(void) aggr_grp_detach_port(grp, port);
1096 		rw_exit(&port->lp_lock);
1097 		aggr_port_delete(port);
1098 		port = cport;
1099 	}
1100 
1101 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1102 
1103 	rw_exit(&grp->lg_lock);
1104 	AGGR_LACP_UNLOCK(grp);
1105 
1106 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1107 	ASSERT(grp == (aggr_grp_t *)val);
1108 
1109 	ASSERT(aggr_grp_cnt > 0);
1110 	aggr_grp_cnt--;
1111 
1112 	rw_exit(&aggr_grp_lock);
1113 	AGGR_GRP_REFRELE(grp);
1114 
1115 	return (0);
1116 }
1117 
1118 void
1119 aggr_grp_free(aggr_grp_t *grp)
1120 {
1121 	ASSERT(grp->lg_refs == 0);
1122 	if (grp->lg_key > AGGR_MAX_KEY) {
1123 		id_free(key_ids, grp->lg_key);
1124 		grp->lg_key = 0;
1125 	}
1126 	kmem_cache_free(aggr_grp_cache, grp);
1127 }
1128 
1129 int
1130 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1131     aggr_grp_info_new_grp_fn_t new_grp_fn,
1132     aggr_grp_info_new_port_fn_t new_port_fn)
1133 {
1134 	aggr_grp_t	*grp;
1135 	aggr_port_t	*port;
1136 	int		rc = 0;
1137 
1138 	rw_enter(&aggr_grp_lock, RW_READER);
1139 
1140 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1141 	    (mod_hash_val_t *)&grp) != 0) {
1142 		rw_exit(&aggr_grp_lock);
1143 		return (ENOENT);
1144 	}
1145 
1146 	rw_enter(&grp->lg_lock, RW_READER);
1147 
1148 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1149 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1150 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1151 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1152 
1153 	if (rc != 0)
1154 		goto bail;
1155 
1156 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1157 		rw_enter(&port->lp_lock, RW_READER);
1158 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1159 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1160 		rw_exit(&port->lp_lock);
1161 
1162 		if (rc != 0)
1163 			goto bail;
1164 	}
1165 
1166 bail:
1167 	rw_exit(&grp->lg_lock);
1168 	rw_exit(&aggr_grp_lock);
1169 	return (rc);
1170 }
1171 
1172 static void
1173 aggr_m_resources(void *arg)
1174 {
1175 	aggr_grp_t *grp = arg;
1176 	aggr_port_t *port;
1177 
1178 	/* Call each port's m_resources function */
1179 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1180 		mac_resources(port->lp_mh);
1181 }
1182 
1183 /*ARGSUSED*/
1184 static void
1185 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1186 {
1187 	miocnak(q, mp, 0, ENOTSUP);
1188 }
1189 
1190 static int
1191 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1192 {
1193 	aggr_port_t	*port;
1194 	uint_t		stat_index;
1195 
1196 	/* We only aggregate counter statistics. */
1197 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1198 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1199 		return (ENOTSUP);
1200 	}
1201 
1202 	/*
1203 	 * Counter statistics for a group are computed by aggregating the
1204 	 * counters of the members MACs while they were aggregated, plus
1205 	 * the residual counter of the group itself, which is updated each
1206 	 * time a MAC is removed from the group.
1207 	 */
1208 	*val = 0;
1209 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1210 		/* actual port statistic */
1211 		*val += aggr_port_stat(port, stat);
1212 		/*
1213 		 * minus the port stat when it was added, plus any residual
1214 		 * amount for the group.
1215 		 */
1216 		if (IS_MAC_STAT(stat)) {
1217 			stat_index = stat - MAC_STAT_MIN;
1218 			*val -= port->lp_stat[stat_index];
1219 			*val += grp->lg_stat[stat_index];
1220 		} else if (IS_MACTYPE_STAT(stat)) {
1221 			stat_index = stat - MACTYPE_STAT_MIN;
1222 			*val -= port->lp_ether_stat[stat_index];
1223 			*val += grp->lg_ether_stat[stat_index];
1224 		}
1225 	}
1226 	return (0);
1227 }
1228 
1229 static int
1230 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1231 {
1232 	aggr_grp_t	*grp = arg;
1233 	int		rval = 0;
1234 
1235 	rw_enter(&grp->lg_lock, RW_READER);
1236 
1237 	switch (stat) {
1238 	case MAC_STAT_IFSPEED:
1239 		*val = grp->lg_ifspeed;
1240 		break;
1241 
1242 	case ETHER_STAT_LINK_DUPLEX:
1243 		*val = grp->lg_link_duplex;
1244 		break;
1245 
1246 	default:
1247 		/*
1248 		 * For all other statistics, we return the aggregated stat
1249 		 * from the underlying ports.  aggr_grp_stat() will set
1250 		 * rval appropriately if the statistic isn't a counter.
1251 		 */
1252 		rval = aggr_grp_stat(grp, stat, val);
1253 	}
1254 
1255 	rw_exit(&grp->lg_lock);
1256 	return (rval);
1257 }
1258 
1259 static int
1260 aggr_m_start(void *arg)
1261 {
1262 	aggr_grp_t *grp = arg;
1263 	aggr_port_t *port;
1264 
1265 	AGGR_LACP_LOCK(grp);
1266 	rw_enter(&grp->lg_lock, RW_WRITER);
1267 
1268 	/*
1269 	 * Attempts to start all configured members of the group.
1270 	 * Group members will be attached when their link-up notification
1271 	 * is received.
1272 	 */
1273 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1274 		rw_enter(&port->lp_lock, RW_WRITER);
1275 		if (aggr_port_start(port) != 0) {
1276 			rw_exit(&port->lp_lock);
1277 			continue;
1278 		}
1279 
1280 		/* set port promiscuous mode */
1281 		if (aggr_port_promisc(port, grp->lg_promisc) != 0)
1282 			aggr_port_stop(port);
1283 		rw_exit(&port->lp_lock);
1284 	}
1285 
1286 	grp->lg_started = B_TRUE;
1287 
1288 	rw_exit(&grp->lg_lock);
1289 	AGGR_LACP_UNLOCK(grp);
1290 
1291 	return (0);
1292 }
1293 
1294 static void
1295 aggr_m_stop(void *arg)
1296 {
1297 	aggr_grp_t *grp = arg;
1298 	aggr_port_t *port;
1299 
1300 	rw_enter(&grp->lg_lock, RW_WRITER);
1301 
1302 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1303 		rw_enter(&port->lp_lock, RW_WRITER);
1304 		aggr_port_stop(port);
1305 		rw_exit(&port->lp_lock);
1306 	}
1307 
1308 	grp->lg_started = B_FALSE;
1309 
1310 	rw_exit(&grp->lg_lock);
1311 }
1312 
1313 static int
1314 aggr_m_promisc(void *arg, boolean_t on)
1315 {
1316 	aggr_grp_t *grp = arg;
1317 	aggr_port_t *port;
1318 	boolean_t link_state_changed = B_FALSE;
1319 
1320 	AGGR_LACP_LOCK(grp);
1321 	rw_enter(&grp->lg_lock, RW_WRITER);
1322 	AGGR_GRP_REFHOLD(grp);
1323 
1324 	ASSERT(!grp->lg_closing);
1325 
1326 	if (on == grp->lg_promisc)
1327 		goto bail;
1328 
1329 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1330 		rw_enter(&port->lp_lock, RW_WRITER);
1331 		AGGR_PORT_REFHOLD(port);
1332 		if (port->lp_started) {
1333 			if (aggr_port_promisc(port, on) != 0) {
1334 				if (aggr_grp_detach_port(grp, port))
1335 					link_state_changed = B_TRUE;
1336 			} else {
1337 				/*
1338 				 * If a port was detached because of a previous
1339 				 * failure changing the promiscuity, the port
1340 				 * is reattached when it successfully changes
1341 				 * the promiscuity now, and this might cause
1342 				 * the link state of the aggregation to change.
1343 				 */
1344 				if (aggr_grp_attach_port(grp, port))
1345 					link_state_changed = B_TRUE;
1346 			}
1347 		}
1348 		rw_exit(&port->lp_lock);
1349 		AGGR_PORT_REFRELE(port);
1350 	}
1351 
1352 	grp->lg_promisc = on;
1353 
1354 	if (link_state_changed)
1355 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1356 
1357 bail:
1358 	rw_exit(&grp->lg_lock);
1359 	AGGR_LACP_UNLOCK(grp);
1360 	AGGR_GRP_REFRELE(grp);
1361 
1362 	return (0);
1363 }
1364 
1365 /*
1366  * Initialize the capabilities that are advertised for the group
1367  * according to the capabilities of the constituent ports.
1368  */
1369 static boolean_t
1370 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1371 {
1372 	aggr_grp_t *grp = arg;
1373 
1374 	switch (cap) {
1375 	case MAC_CAPAB_HCKSUM: {
1376 		uint32_t *hcksum_txflags = cap_data;
1377 		*hcksum_txflags = grp->lg_hcksum_txflags;
1378 		break;
1379 	}
1380 	case MAC_CAPAB_POLL:
1381 		/*
1382 		 * There's nothing for us to fill in, we simply return
1383 		 * B_TRUE or B_FALSE to represent the group's support
1384 		 * status for this capability.
1385 		 */
1386 		return (grp->lg_gldv3_polling);
1387 	case MAC_CAPAB_NO_NATIVEVLAN:
1388 		return (!grp->lg_vlan);
1389 	case MAC_CAPAB_NO_ZCOPY:
1390 		return (!grp->lg_zcopy);
1391 	default:
1392 		return (B_FALSE);
1393 	}
1394 	return (B_TRUE);
1395 }
1396 
1397 static int
1398 aggr_grp_multicst(aggr_grp_t *grp, boolean_t add, const uint8_t *addrp)
1399 {
1400 	lg_mcst_addr_t	*mcst, **ppmcst;
1401 
1402 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1403 
1404 	for (ppmcst = &(grp->lg_mcst_list); (mcst = *ppmcst) != NULL;
1405 	    ppmcst = &(mcst->lg_mcst_nextp)) {
1406 		if (bcmp(mcst->lg_mcst_addr, addrp, MAXMACADDRLEN) == 0)
1407 			break;
1408 	}
1409 
1410 	if (add) {
1411 		if (mcst != NULL)
1412 			return (0);
1413 		mcst = kmem_zalloc(sizeof (lg_mcst_addr_t), KM_NOSLEEP);
1414 		if (mcst == NULL)
1415 			return (ENOMEM);
1416 		bcopy(addrp, mcst->lg_mcst_addr, MAXMACADDRLEN);
1417 		*ppmcst = mcst;
1418 	} else {
1419 		if (mcst == NULL)
1420 			return (ENOENT);
1421 		*ppmcst = mcst->lg_mcst_nextp;
1422 		kmem_free(mcst, sizeof (lg_mcst_addr_t));
1423 	}
1424 	return (0);
1425 }
1426 
1427 /*
1428  * Add or remove the multicast addresses that are defined for the group
1429  * to or from the specified port.
1430  * This function is called before stopping a port, before a port
1431  * is detached from a group, and when attaching a port to a group.
1432  */
1433 void
1434 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
1435 {
1436 	aggr_grp_t *grp = port->lp_grp;
1437 	lg_mcst_addr_t	*mcst;
1438 
1439 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
1440 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
1441 
1442 	if (!port->lp_started)
1443 		return;
1444 
1445 	for (mcst = grp->lg_mcst_list; mcst != NULL;
1446 	    mcst = mcst->lg_mcst_nextp)
1447 		(void) aggr_port_multicst(port, add, mcst->lg_mcst_addr);
1448 }
1449 
1450 static int
1451 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1452 {
1453 	aggr_grp_t *grp = arg;
1454 	aggr_port_t *port = NULL;
1455 	int err = 0, cerr;
1456 
1457 	rw_enter(&grp->lg_lock, RW_WRITER);
1458 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1459 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
1460 			continue;
1461 		cerr = aggr_port_multicst(port, add, addrp);
1462 		if (cerr == 0)
1463 			(void) aggr_grp_multicst(grp, add, addrp);
1464 		if (cerr != 0 && err == 0)
1465 			err = cerr;
1466 	}
1467 	rw_exit(&grp->lg_lock);
1468 	return (err);
1469 }
1470 
1471 static int
1472 aggr_m_unicst(void *arg, const uint8_t *macaddr)
1473 {
1474 	aggr_grp_t *grp = arg;
1475 	int rc;
1476 
1477 	AGGR_LACP_LOCK(grp);
1478 	rw_enter(&grp->lg_lock, RW_WRITER);
1479 	rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
1480 	    0, 0);
1481 	rw_exit(&grp->lg_lock);
1482 	AGGR_LACP_UNLOCK(grp);
1483 
1484 	return (rc);
1485 }
1486 
1487 /*
1488  * Initialize the capabilities that are advertised for the group
1489  * according to the capabilities of the constituent ports.
1490  */
1491 static void
1492 aggr_grp_capab_set(aggr_grp_t *grp)
1493 {
1494 	uint32_t cksum;
1495 	aggr_port_t *port;
1496 
1497 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1498 	ASSERT(grp->lg_ports != NULL);
1499 
1500 	grp->lg_hcksum_txflags = (uint32_t)-1;
1501 	grp->lg_gldv3_polling = B_TRUE;
1502 	grp->lg_zcopy = B_TRUE;
1503 	grp->lg_vlan = B_TRUE;
1504 
1505 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1506 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
1507 			cksum = 0;
1508 		grp->lg_hcksum_txflags &= cksum;
1509 
1510 		grp->lg_vlan &=
1511 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
1512 
1513 		grp->lg_zcopy &=
1514 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
1515 
1516 		grp->lg_gldv3_polling &=
1517 		    mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL);
1518 	}
1519 }
1520 
1521 /*
1522  * Checks whether the capabilities of the port being added are compatible
1523  * with the current capabilities of the aggregation.
1524  */
1525 static boolean_t
1526 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
1527 {
1528 	uint32_t hcksum_txflags;
1529 
1530 	ASSERT(grp->lg_ports != NULL);
1531 
1532 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
1533 	    grp->lg_vlan) != grp->lg_vlan) {
1534 		return (B_FALSE);
1535 	}
1536 
1537 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
1538 	    grp->lg_zcopy) != grp->lg_zcopy) {
1539 		return (B_FALSE);
1540 	}
1541 
1542 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
1543 		if (grp->lg_hcksum_txflags != 0)
1544 			return (B_FALSE);
1545 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
1546 	    grp->lg_hcksum_txflags) {
1547 		return (B_FALSE);
1548 	}
1549 
1550 	if (mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL) !=
1551 	    grp->lg_gldv3_polling) {
1552 		return (B_FALSE);
1553 	}
1554 
1555 	return (B_TRUE);
1556 }
1557 
1558 /*
1559  * Returns the maximum SDU according to the SDU of the constituent ports.
1560  */
1561 static uint_t
1562 aggr_grp_max_sdu(aggr_grp_t *grp)
1563 {
1564 	uint_t max_sdu = (uint_t)-1;
1565 	aggr_port_t *port;
1566 
1567 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1568 	ASSERT(grp->lg_ports != NULL);
1569 
1570 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1571 		uint_t port_sdu_max;
1572 
1573 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
1574 		if (max_sdu > port_sdu_max)
1575 			max_sdu = port_sdu_max;
1576 	}
1577 
1578 	return (max_sdu);
1579 }
1580 
1581 /*
1582  * Checks if the maximum SDU of the specified port is compatible
1583  * with the maximum SDU of the specified aggregation group, returns
1584  * B_TRUE if it is, B_FALSE otherwise.
1585  */
1586 static boolean_t
1587 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
1588 {
1589 	uint_t port_sdu_max;
1590 
1591 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
1592 	return (port_sdu_max >= grp->lg_max_sdu);
1593 }
1594 
1595 /*
1596  * Returns the maximum margin according to the margin of the constituent ports.
1597  */
1598 static uint32_t
1599 aggr_grp_max_margin(aggr_grp_t *grp)
1600 {
1601 	uint32_t margin = UINT32_MAX;
1602 	aggr_port_t *port;
1603 
1604 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1605 	ASSERT(grp->lg_ports != NULL);
1606 
1607 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1608 		if (margin > port->lp_margin)
1609 			margin = port->lp_margin;
1610 	}
1611 
1612 	grp->lg_margin = margin;
1613 	return (margin);
1614 }
1615 
1616 /*
1617  * Checks if the maximum margin of the specified port is compatible
1618  * with the maximum margin of the specified aggregation group, returns
1619  * B_TRUE if it is, B_FALSE otherwise.
1620  */
1621 static boolean_t
1622 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
1623 {
1624 	if (port->lp_margin >= grp->lg_margin)
1625 		return (B_TRUE);
1626 
1627 	/*
1628 	 * See whether the current margin value is allowed to be changed to
1629 	 * the new value.
1630 	 */
1631 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
1632 		return (B_FALSE);
1633 
1634 	grp->lg_margin = port->lp_margin;
1635 	return (B_TRUE);
1636 }
1637