xref: /titanic_52/usr/src/uts/common/io/aggr/aggr_grp.c (revision c2b6e926ea57d0ba055f91471cfc9772c7fbacd0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
30  *
31  * An instance of the structure aggr_grp_t is allocated for each
32  * link aggregation group. When created, aggr_grp_t objects are
33  * entered into the aggr_grp_hash hash table maintained by the modhash
34  * module. The hash key is the port number associated with the link
35  * aggregation group. The port number associated with a group corresponds
36  * the key associated with the group.
37  *
38  * A set of MAC ports are associated with each association group.
39  */
40 
41 #include <sys/types.h>
42 #include <sys/sysmacros.h>
43 #include <sys/conf.h>
44 #include <sys/cmn_err.h>
45 #include <sys/list.h>
46 #include <sys/ksynch.h>
47 #include <sys/kmem.h>
48 #include <sys/stream.h>
49 #include <sys/modctl.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/atomic.h>
53 #include <sys/stat.h>
54 #include <sys/modhash.h>
55 #include <sys/strsun.h>
56 #include <sys/dlpi.h>
57 
58 #include <sys/aggr.h>
59 #include <sys/aggr_impl.h>
60 
61 static int aggr_m_start(void *);
62 static void aggr_m_stop(void *);
63 static int aggr_m_promisc(void *, boolean_t);
64 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
65 static int aggr_m_unicst(void *, const uint8_t *);
66 static int aggr_m_stat(void *, uint_t, uint64_t *);
67 static void aggr_m_resources(void *);
68 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
69 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
70 
71 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *);
72 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
73     boolean_t *);
74 static void aggr_grp_capab_set(aggr_grp_t *);
75 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
76 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
77 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
78 
79 static kmem_cache_t	*aggr_grp_cache;
80 static mod_hash_t	*aggr_grp_hash;
81 static krwlock_t	aggr_grp_lock;
82 static uint_t		aggr_grp_cnt;
83 
84 #define	GRP_HASHSZ		64
85 #define	GRP_HASH_KEY(key)	((mod_hash_key_t)(uintptr_t)key)
86 
87 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
88 
89 /* used by grp_info_walker */
90 typedef struct aggr_grp_info_state {
91 	uint32_t	ls_group_key;
92 	boolean_t	ls_group_found;
93 	aggr_grp_info_new_grp_fn_t ls_new_grp_fn;
94 	aggr_grp_info_new_port_fn_t ls_new_port_fn;
95 	void		*ls_fn_arg;
96 	int		ls_rc;
97 } aggr_grp_info_state_t;
98 
99 #define	AGGR_M_CALLBACK_FLAGS	(MC_RESOURCES | MC_IOCTL | MC_GETCAPAB)
100 
101 static mac_callbacks_t aggr_m_callbacks = {
102 	AGGR_M_CALLBACK_FLAGS,
103 	aggr_m_stat,
104 	aggr_m_start,
105 	aggr_m_stop,
106 	aggr_m_promisc,
107 	aggr_m_multicst,
108 	aggr_m_unicst,
109 	aggr_m_tx,
110 	aggr_m_resources,
111 	aggr_m_ioctl,
112 	aggr_m_capab_get
113 };
114 
115 /*ARGSUSED*/
116 static int
117 aggr_grp_constructor(void *buf, void *arg, int kmflag)
118 {
119 	aggr_grp_t *grp = buf;
120 
121 	bzero(grp, sizeof (*grp));
122 	rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL);
123 	mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL);
124 
125 	grp->lg_link_state = LINK_STATE_UNKNOWN;
126 
127 	return (0);
128 }
129 
130 /*ARGSUSED*/
131 static void
132 aggr_grp_destructor(void *buf, void *arg)
133 {
134 	aggr_grp_t *grp = buf;
135 
136 	if (grp->lg_tx_ports != NULL) {
137 		kmem_free(grp->lg_tx_ports,
138 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
139 	}
140 
141 	mutex_destroy(&grp->aggr.gl_lock);
142 	rw_destroy(&grp->lg_lock);
143 }
144 
145 void
146 aggr_grp_init(void)
147 {
148 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
149 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
150 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
151 
152 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
153 	    GRP_HASHSZ, mod_hash_null_valdtor);
154 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
155 	aggr_grp_cnt = 0;
156 }
157 
158 void
159 aggr_grp_fini(void)
160 {
161 	rw_destroy(&aggr_grp_lock);
162 	mod_hash_destroy_idhash(aggr_grp_hash);
163 	kmem_cache_destroy(aggr_grp_cache);
164 }
165 
166 uint_t
167 aggr_grp_count(void)
168 {
169 	uint_t	count;
170 
171 	rw_enter(&aggr_grp_lock, RW_READER);
172 	count = aggr_grp_cnt;
173 	rw_exit(&aggr_grp_lock);
174 	return (count);
175 }
176 
177 /*
178  * Attach a port to a link aggregation group.
179  *
180  * A port is attached to a link aggregation group once its speed
181  * and link state have been verified.
182  *
183  * Returns B_TRUE if the group link state or speed has changed. If
184  * it's the case, the caller must notify the MAC layer via a call
185  * to mac_link().
186  */
187 boolean_t
188 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
189 {
190 	boolean_t link_state_changed = B_FALSE;
191 
192 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
193 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
194 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
195 
196 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
197 		return (B_FALSE);
198 
199 	/*
200 	 * Validate the MAC port link speed and update the group
201 	 * link speed if needed.
202 	 */
203 	if (port->lp_ifspeed == 0 ||
204 	    port->lp_link_state != LINK_STATE_UP ||
205 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
206 		/*
207 		 * Can't attach a MAC port with unknown link speed,
208 		 * down link, or not in full duplex mode.
209 		 */
210 		return (B_FALSE);
211 	}
212 
213 	if (grp->lg_ifspeed == 0) {
214 		/*
215 		 * The group inherits the speed of the first link being
216 		 * attached.
217 		 */
218 		grp->lg_ifspeed = port->lp_ifspeed;
219 		link_state_changed = B_TRUE;
220 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
221 		/*
222 		 * The link speed of the MAC port must be the same as
223 		 * the group link speed, as per 802.3ad. Since it is
224 		 * not, the attach is cancelled.
225 		 */
226 		return (B_FALSE);
227 	}
228 
229 	grp->lg_nattached_ports++;
230 
231 	/*
232 	 * Update the group link state.
233 	 */
234 	if (grp->lg_link_state != LINK_STATE_UP) {
235 		grp->lg_link_state = LINK_STATE_UP;
236 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
237 		link_state_changed = B_TRUE;
238 	}
239 
240 	aggr_grp_multicst_port(port, B_TRUE);
241 
242 	/*
243 	 * Update port's state.
244 	 */
245 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
246 
247 	/*
248 	 * Set port's receive callback
249 	 */
250 	port->lp_mrh = mac_rx_add(port->lp_mh, aggr_recv_cb, (void *)port);
251 
252 	/*
253 	 * If LACP is OFF, the port can be used to send data as soon
254 	 * as its link is up and verified to be compatible with the
255 	 * aggregation.
256 	 *
257 	 * If LACP is active or passive, notify the LACP subsystem, which
258 	 * will enable sending on the port following the LACP protocol.
259 	 */
260 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
261 		aggr_send_port_enable(port);
262 	else
263 		aggr_lacp_port_attached(port);
264 
265 	return (link_state_changed);
266 }
267 
268 boolean_t
269 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
270 {
271 	boolean_t link_state_changed = B_FALSE;
272 
273 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
274 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
275 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
276 
277 	/* update state */
278 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
279 		return (B_FALSE);
280 
281 	mac_rx_remove(port->lp_mh, port->lp_mrh, B_FALSE);
282 	port->lp_state = AGGR_PORT_STATE_STANDBY;
283 
284 	aggr_grp_multicst_port(port, B_FALSE);
285 
286 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
287 		aggr_send_port_disable(port);
288 	else
289 		aggr_lacp_port_detached(port);
290 
291 	grp->lg_nattached_ports--;
292 	if (grp->lg_nattached_ports == 0) {
293 		/* the last attached MAC port of the group is being detached */
294 		grp->lg_ifspeed = 0;
295 		grp->lg_link_state = LINK_STATE_DOWN;
296 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
297 		link_state_changed = B_TRUE;
298 	}
299 
300 	return (link_state_changed);
301 }
302 
303 /*
304  * Update the MAC addresses of the constituent ports of the specified
305  * group. This function is invoked:
306  * - after creating a new aggregation group.
307  * - after adding new ports to an aggregation group.
308  * - after removing a port from a group when the MAC address of
309  *   that port was used for the MAC address of the group.
310  * - after the MAC address of a port changed when the MAC address
311  *   of that port was used for the MAC address of the group.
312  *
313  * Return true if the link state of the aggregation changed, for example
314  * as a result of a failure changing the MAC address of one of the
315  * constituent ports.
316  */
317 boolean_t
318 aggr_grp_update_ports_mac(aggr_grp_t *grp)
319 {
320 	aggr_port_t *cport;
321 	boolean_t link_state_changed = B_FALSE;
322 
323 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
324 
325 	if (grp->lg_closing)
326 		return (link_state_changed);
327 
328 	for (cport = grp->lg_ports; cport != NULL;
329 	    cport = cport->lp_next) {
330 		rw_enter(&cport->lp_lock, RW_WRITER);
331 		if (aggr_port_unicst(cport, grp->lg_addr) != 0) {
332 			if (aggr_grp_detach_port(grp, cport))
333 				link_state_changed = B_TRUE;
334 		} else {
335 			/*
336 			 * If a port was detached because of a previous
337 			 * failure changing the MAC address, the port is
338 			 * reattached when it successfully changes the MAC
339 			 * address now, and this might cause the link state
340 			 * of the aggregation to change.
341 			 */
342 			if (aggr_grp_attach_port(grp, cport))
343 				link_state_changed = B_TRUE;
344 		}
345 		rw_exit(&cport->lp_lock);
346 	}
347 	return (link_state_changed);
348 }
349 
350 /*
351  * Invoked when the MAC address of a port has changed. If the port's
352  * MAC address was used for the group MAC address, set mac_addr_changedp
353  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
354  * notification. If the link state changes due to detach/attach of
355  * the constituent port, set link_state_changedp to B_TRUE to indicate
356  * to the caller that it should send a MAC_NOTE_LINK notification. In both
357  * cases, it is the responsibility of the caller to invoke notification
358  * functions after releasing the the port lock.
359  */
360 void
361 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
362     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
363 {
364 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
365 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
366 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
367 	ASSERT(mac_addr_changedp != NULL);
368 	ASSERT(link_state_changedp != NULL);
369 
370 	*mac_addr_changedp = B_FALSE;
371 	*link_state_changedp = B_FALSE;
372 
373 	if (grp->lg_addr_fixed) {
374 		/*
375 		 * The group is using a fixed MAC address or an automatic
376 		 * MAC address has not been set.
377 		 */
378 		return;
379 	}
380 
381 	if (grp->lg_mac_addr_port == port) {
382 		/*
383 		 * The MAC address of the port was assigned to the group
384 		 * MAC address. Update the group MAC address.
385 		 */
386 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
387 		*mac_addr_changedp = B_TRUE;
388 	} else {
389 		/*
390 		 * Update the actual port MAC address to the MAC address
391 		 * of the group.
392 		 */
393 		if (aggr_port_unicst(port, grp->lg_addr) != 0) {
394 			*link_state_changedp = aggr_grp_detach_port(grp, port);
395 		} else {
396 			/*
397 			 * If a port was detached because of a previous
398 			 * failure changing the MAC address, the port is
399 			 * reattached when it successfully changes the MAC
400 			 * address now, and this might cause the link state
401 			 * of the aggregation to change.
402 			 */
403 			*link_state_changedp = aggr_grp_attach_port(grp, port);
404 		}
405 	}
406 }
407 
408 /*
409  * Add a port to a link aggregation group.
410  */
411 static int
412 aggr_grp_add_port(aggr_grp_t *grp, const char *name, aggr_port_t **pp)
413 {
414 	aggr_port_t *port, **cport;
415 	int err;
416 
417 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
418 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
419 
420 	/* create new port */
421 	err = aggr_port_create(name, &port);
422 	if (err != 0)
423 		return (err);
424 
425 	rw_enter(&port->lp_lock, RW_WRITER);
426 
427 	/* add port to list of group constituent ports */
428 	cport = &grp->lg_ports;
429 	while (*cport != NULL)
430 		cport = &((*cport)->lp_next);
431 	*cport = port;
432 
433 	/*
434 	 * Back reference to the group it is member of. A port always
435 	 * holds a reference to its group to ensure that the back
436 	 * reference is always valid.
437 	 */
438 	port->lp_grp = grp;
439 	AGGR_GRP_REFHOLD(grp);
440 	grp->lg_nports++;
441 
442 	aggr_lacp_init_port(port);
443 
444 	/*
445 	 * Initialize the callback functions for this port. Note that this
446 	 * can only be done after the lp_grp field is set.
447 	 */
448 	aggr_port_init_callbacks(port);
449 
450 	rw_exit(&port->lp_lock);
451 
452 	if (pp != NULL)
453 		*pp = port;
454 
455 	return (0);
456 }
457 
458 /*
459  * Add one or more ports to an existing link aggregation group.
460  */
461 int
462 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports)
463 {
464 	int rc, i, nadded = 0;
465 	aggr_grp_t *grp = NULL;
466 	aggr_port_t *port;
467 	boolean_t link_state_changed = B_FALSE;
468 
469 	/* get group corresponding to key */
470 	rw_enter(&aggr_grp_lock, RW_READER);
471 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
472 	    (mod_hash_val_t *)&grp) != 0) {
473 		rw_exit(&aggr_grp_lock);
474 		return (ENOENT);
475 	}
476 	AGGR_GRP_REFHOLD(grp);
477 	rw_exit(&aggr_grp_lock);
478 
479 	AGGR_LACP_LOCK(grp);
480 	rw_enter(&grp->lg_lock, RW_WRITER);
481 
482 	/* add the specified ports to group */
483 	for (i = 0; i < nports; i++) {
484 		/* add port to group */
485 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname, &port)) !=
486 		    0) {
487 			goto bail;
488 		}
489 		ASSERT(port != NULL);
490 		nadded++;
491 
492 		/* check capabilities */
493 		if (!aggr_grp_capab_check(grp, port) ||
494 		    !aggr_grp_sdu_check(grp, port)) {
495 			rc = ENOTSUP;
496 			goto bail;
497 		}
498 
499 		/* start port if group has already been started */
500 		if (grp->lg_started) {
501 			rw_enter(&port->lp_lock, RW_WRITER);
502 			rc = aggr_port_start(port);
503 			if (rc != 0) {
504 				rw_exit(&port->lp_lock);
505 				goto bail;
506 			}
507 
508 			/* set port promiscuous mode */
509 			rc = aggr_port_promisc(port, grp->lg_promisc);
510 			if (rc != 0) {
511 				rw_exit(&port->lp_lock);
512 				goto bail;
513 			}
514 			rw_exit(&port->lp_lock);
515 		}
516 
517 		/*
518 		 * Attach each port if necessary.
519 		 */
520 		if (aggr_port_notify_link(grp, port, B_FALSE))
521 			link_state_changed = B_TRUE;
522 	}
523 
524 	/* update the MAC address of the constituent ports */
525 	if (aggr_grp_update_ports_mac(grp))
526 		link_state_changed = B_TRUE;
527 
528 	if (link_state_changed)
529 		mac_link_update(grp->lg_mh, grp->lg_link_state);
530 
531 bail:
532 	if (rc != 0) {
533 		/* stop and remove ports that have been added */
534 		for (i = 0; i < nadded && !grp->lg_closing; i++) {
535 			port = aggr_grp_port_lookup(grp, ports[i].lp_devname);
536 			ASSERT(port != NULL);
537 			if (grp->lg_started) {
538 				rw_enter(&port->lp_lock, RW_WRITER);
539 				aggr_port_stop(port);
540 				rw_exit(&port->lp_lock);
541 			}
542 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
543 		}
544 	}
545 
546 	rw_exit(&grp->lg_lock);
547 	AGGR_LACP_UNLOCK(grp);
548 	if (rc == 0 && !grp->lg_closing)
549 		mac_resource_update(grp->lg_mh);
550 	AGGR_GRP_REFRELE(grp);
551 	return (rc);
552 }
553 
554 /*
555  * Update properties of an existing link aggregation group.
556  */
557 int
558 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask,
559     uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr,
560     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
561 {
562 	int rc = 0;
563 	aggr_grp_t *grp = NULL;
564 	boolean_t mac_addr_changed = B_FALSE;
565 	boolean_t link_state_changed = B_FALSE;
566 
567 	if (grp_arg == NULL) {
568 		/* get group corresponding to key */
569 		rw_enter(&aggr_grp_lock, RW_READER);
570 		if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
571 		    (mod_hash_val_t *)&grp) != 0) {
572 			rc = ENOENT;
573 			goto bail;
574 		}
575 		AGGR_LACP_LOCK(grp);
576 		rw_enter(&grp->lg_lock, RW_WRITER);
577 	} else {
578 		grp = grp_arg;
579 		ASSERT(AGGR_LACP_LOCK_HELD(grp));
580 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
581 	}
582 
583 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
584 	AGGR_GRP_REFHOLD(grp);
585 
586 	/* validate fixed address if specified */
587 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
588 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
589 	    (mac_addr[0] & 0x01))) {
590 		rc = EINVAL;
591 		goto bail;
592 	}
593 
594 	/* update policy if requested */
595 	if (update_mask & AGGR_MODIFY_POLICY)
596 		aggr_send_update_policy(grp, policy);
597 
598 	/* update unicast MAC address if requested */
599 	if (update_mask & AGGR_MODIFY_MAC) {
600 		if (mac_fixed) {
601 			/* user-supplied MAC address */
602 			grp->lg_mac_addr_port = NULL;
603 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
604 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
605 				mac_addr_changed = B_TRUE;
606 			}
607 		} else if (grp->lg_addr_fixed) {
608 			/* switch from user-supplied to automatic */
609 			aggr_port_t *port = grp->lg_ports;
610 
611 			rw_enter(&port->lp_lock, RW_WRITER);
612 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
613 			grp->lg_mac_addr_port = port;
614 			mac_addr_changed = B_TRUE;
615 			rw_exit(&port->lp_lock);
616 		}
617 		grp->lg_addr_fixed = mac_fixed;
618 	}
619 
620 	if (mac_addr_changed)
621 		link_state_changed = aggr_grp_update_ports_mac(grp);
622 
623 	if (update_mask & AGGR_MODIFY_LACP_MODE)
624 		aggr_lacp_update_mode(grp, lacp_mode);
625 
626 	if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing)
627 		aggr_lacp_update_timer(grp, lacp_timer);
628 
629 bail:
630 	if (grp != NULL && !grp->lg_closing) {
631 		/*
632 		 * If grp_arg is non-NULL, this function is called from
633 		 * mac_unicst_set(), and the MAC_NOTE_UNICST notification
634 		 * will be sent there.
635 		 */
636 		if ((grp_arg == NULL) && mac_addr_changed)
637 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
638 
639 		if (link_state_changed)
640 			mac_link_update(grp->lg_mh, grp->lg_link_state);
641 
642 	}
643 
644 	if (grp_arg == NULL) {
645 		if (grp != NULL) {
646 			rw_exit(&grp->lg_lock);
647 			AGGR_LACP_UNLOCK(grp);
648 		}
649 		rw_exit(&aggr_grp_lock);
650 	}
651 
652 	if (grp != NULL)
653 		AGGR_GRP_REFRELE(grp);
654 
655 	return (rc);
656 }
657 
658 /*
659  * Create a new link aggregation group upon request from administrator.
660  * Returns 0 on success, an errno on failure.
661  */
662 int
663 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports,
664     uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr,
665     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
666 {
667 	aggr_grp_t *grp = NULL;
668 	aggr_port_t *port;
669 	mac_register_t *mac;
670 	boolean_t link_state_changed;
671 	int err;
672 	int i;
673 
674 	/* need at least one port */
675 	if (nports == 0)
676 		return (EINVAL);
677 
678 	rw_enter(&aggr_grp_lock, RW_WRITER);
679 
680 	/* does a group with the same key already exist? */
681 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
682 	    (mod_hash_val_t *)&grp);
683 	if (err == 0) {
684 		rw_exit(&aggr_grp_lock);
685 		return (EEXIST);
686 	}
687 
688 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
689 
690 	AGGR_LACP_LOCK(grp);
691 	rw_enter(&grp->lg_lock, RW_WRITER);
692 
693 	grp->lg_refs = 1;
694 	grp->lg_closing = B_FALSE;
695 	grp->lg_key = key;
696 
697 	grp->lg_ifspeed = 0;
698 	grp->lg_link_state = LINK_STATE_UNKNOWN;
699 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
700 	grp->lg_started = B_FALSE;
701 	grp->lg_promisc = B_FALSE;
702 	aggr_lacp_init_grp(grp);
703 
704 	/* add MAC ports to group */
705 	grp->lg_ports = NULL;
706 	grp->lg_nports = 0;
707 	grp->lg_nattached_ports = 0;
708 	grp->lg_ntx_ports = 0;
709 
710 	for (i = 0; i < nports; i++) {
711 		err = aggr_grp_add_port(grp, ports[i].lp_devname, NULL);
712 		if (err != 0)
713 			goto bail;
714 	}
715 
716 	/*
717 	 * If no explicit MAC address was specified by the administrator,
718 	 * set it to the MAC address of the first port.
719 	 */
720 	grp->lg_addr_fixed = mac_fixed;
721 	if (grp->lg_addr_fixed) {
722 		/* validate specified address */
723 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
724 			err = EINVAL;
725 			goto bail;
726 		}
727 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
728 	} else {
729 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
730 		grp->lg_mac_addr_port = grp->lg_ports;
731 	}
732 
733 	/*
734 	 * Update the MAC address of the constituent ports.
735 	 * None of the port is attached at this time, the link state of the
736 	 * aggregation will not change.
737 	 */
738 	link_state_changed = aggr_grp_update_ports_mac(grp);
739 	ASSERT(!link_state_changed);
740 
741 	/* update outbound load balancing policy */
742 	aggr_send_update_policy(grp, policy);
743 
744 	/* set the initial group capabilities */
745 	aggr_grp_capab_set(grp);
746 
747 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
748 		goto bail;
749 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
750 	mac->m_driver = grp;
751 	mac->m_dip = aggr_dip;
752 	mac->m_instance = key;
753 	mac->m_src_addr = grp->lg_addr;
754 	mac->m_callbacks = &aggr_m_callbacks;
755 	mac->m_min_sdu = 0;
756 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
757 	err = mac_register(mac, &grp->lg_mh);
758 	mac_free(mac);
759 	if (err != 0)
760 		goto bail;
761 
762 	/* set LACP mode */
763 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
764 
765 	/*
766 	 * Attach each port if necessary.
767 	 */
768 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
769 		if (aggr_port_notify_link(grp, port, B_FALSE))
770 			link_state_changed = B_TRUE;
771 	}
772 
773 	if (link_state_changed)
774 		mac_link_update(grp->lg_mh, grp->lg_link_state);
775 
776 	/* add new group to hash table */
777 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(key),
778 	    (mod_hash_val_t)grp);
779 	ASSERT(err == 0);
780 	aggr_grp_cnt++;
781 
782 	rw_exit(&grp->lg_lock);
783 	AGGR_LACP_UNLOCK(grp);
784 	rw_exit(&aggr_grp_lock);
785 	return (0);
786 
787 bail:
788 	if (grp != NULL) {
789 		aggr_port_t *cport;
790 
791 		grp->lg_closing = B_TRUE;
792 
793 		port = grp->lg_ports;
794 		while (port != NULL) {
795 			cport = port->lp_next;
796 			aggr_port_delete(port);
797 			port = cport;
798 		}
799 
800 		rw_exit(&grp->lg_lock);
801 		AGGR_LACP_UNLOCK(grp);
802 
803 		kmem_cache_free(aggr_grp_cache, grp);
804 	}
805 
806 	rw_exit(&aggr_grp_lock);
807 	return (err);
808 }
809 
810 /*
811  * Return a pointer to the member of a group with specified device name
812  * and port number.
813  */
814 static aggr_port_t *
815 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname)
816 {
817 	aggr_port_t *port;
818 
819 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
820 
821 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
822 		if (strcmp(port->lp_devname, devname) == 0)
823 			break;
824 	}
825 
826 	return (port);
827 }
828 
829 /*
830  * Stop, detach and remove a port from a link aggregation group.
831  */
832 static int
833 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
834     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
835 {
836 	int rc = 0;
837 	aggr_port_t **pport;
838 	boolean_t mac_addr_changed = B_FALSE;
839 	boolean_t link_state_changed = B_FALSE;
840 	uint64_t val;
841 	uint_t i;
842 	uint_t stat;
843 
844 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
845 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
846 	ASSERT(grp->lg_nports > 1);
847 	ASSERT(!grp->lg_closing);
848 
849 	/* unlink port */
850 	for (pport = &grp->lg_ports; *pport != port;
851 	    pport = &(*pport)->lp_next) {
852 		if (*pport == NULL) {
853 			rc = ENOENT;
854 			goto done;
855 		}
856 	}
857 	*pport = port->lp_next;
858 
859 	atomic_add_32(&port->lp_closing, 1);
860 
861 	rw_enter(&port->lp_lock, RW_WRITER);
862 
863 	/*
864 	 * If the MAC address of the port being removed was assigned
865 	 * to the group, update the group MAC address
866 	 * using the MAC address of a different port.
867 	 */
868 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
869 		/*
870 		 * Set the MAC address of the group to the
871 		 * MAC address of its first port.
872 		 */
873 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
874 		grp->lg_mac_addr_port = grp->lg_ports;
875 		mac_addr_changed = B_TRUE;
876 	}
877 
878 	link_state_changed = aggr_grp_detach_port(grp, port);
879 
880 	/*
881 	 * Add the counter statistics of the ports while it was aggregated
882 	 * to the group's residual statistics.  This is done by obtaining
883 	 * the current counter from the underlying MAC then subtracting the
884 	 * value of the counter at the moment it was added to the
885 	 * aggregation.
886 	 */
887 	for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) {
888 		stat = i + MAC_STAT_MIN;
889 		if (!MAC_STAT_ISACOUNTER(stat))
890 			continue;
891 		val = aggr_port_stat(port, stat);
892 		val -= port->lp_stat[i];
893 		grp->lg_stat[i] += val;
894 	}
895 	for (i = 0; i < ETHER_NSTAT && !grp->lg_closing; i++) {
896 		stat = i + MACTYPE_STAT_MIN;
897 		if (!ETHER_STAT_ISACOUNTER(stat))
898 			continue;
899 		val = aggr_port_stat(port, stat);
900 		val -= port->lp_ether_stat[i];
901 		grp->lg_ether_stat[i] += val;
902 	}
903 
904 	grp->lg_nports--;
905 
906 	rw_exit(&port->lp_lock);
907 
908 	aggr_port_delete(port);
909 
910 	/*
911 	 * If the group MAC address has changed, update the MAC address of
912 	 * the remaining consistuent ports according to the new MAC
913 	 * address of the group.
914 	 */
915 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
916 		link_state_changed = B_TRUE;
917 
918 done:
919 	if (mac_addr_changedp != NULL)
920 		*mac_addr_changedp = mac_addr_changed;
921 	if (link_state_changedp != NULL)
922 		*link_state_changedp = link_state_changed;
923 
924 	return (rc);
925 }
926 
927 /*
928  * Remove one or more ports from an existing link aggregation group.
929  */
930 int
931 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports)
932 {
933 	int rc = 0, i;
934 	aggr_grp_t *grp = NULL;
935 	aggr_port_t *port;
936 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
937 	boolean_t link_state_update = B_FALSE, link_state_changed;
938 
939 	/* get group corresponding to key */
940 	rw_enter(&aggr_grp_lock, RW_READER);
941 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
942 	    (mod_hash_val_t *)&grp) != 0) {
943 		rw_exit(&aggr_grp_lock);
944 		return (ENOENT);
945 	}
946 	AGGR_GRP_REFHOLD(grp);
947 	rw_exit(&aggr_grp_lock);
948 
949 	AGGR_LACP_LOCK(grp);
950 	rw_enter(&grp->lg_lock, RW_WRITER);
951 
952 	/* we need to keep at least one port per group */
953 	if (nports >= grp->lg_nports) {
954 		rc = EINVAL;
955 		goto bail;
956 	}
957 
958 	/* first verify that all the groups are valid */
959 	for (i = 0; i < nports; i++) {
960 		if (aggr_grp_port_lookup(grp, ports[i].lp_devname) == NULL) {
961 			/* port not found */
962 			rc = ENOENT;
963 			goto bail;
964 		}
965 	}
966 
967 	/* remove the specified ports from group */
968 	for (i = 0; i < nports && !grp->lg_closing; i++) {
969 		/* lookup port */
970 		port = aggr_grp_port_lookup(grp, ports[i].lp_devname);
971 		ASSERT(port != NULL);
972 
973 		/* stop port if group has already been started */
974 		if (grp->lg_started) {
975 			rw_enter(&port->lp_lock, RW_WRITER);
976 			aggr_port_stop(port);
977 			rw_exit(&port->lp_lock);
978 		}
979 
980 		/* remove port from group */
981 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
982 		    &link_state_changed);
983 		ASSERT(rc == 0);
984 		mac_addr_update = mac_addr_update || mac_addr_changed;
985 		link_state_update = link_state_update || link_state_changed;
986 	}
987 
988 bail:
989 	rw_exit(&grp->lg_lock);
990 	AGGR_LACP_UNLOCK(grp);
991 	if (!grp->lg_closing) {
992 		if (mac_addr_update)
993 			mac_unicst_update(grp->lg_mh, grp->lg_addr);
994 		if (link_state_update)
995 			mac_link_update(grp->lg_mh, grp->lg_link_state);
996 		if (rc == 0)
997 			mac_resource_update(grp->lg_mh);
998 	}
999 	AGGR_GRP_REFRELE(grp);
1000 
1001 	return (rc);
1002 }
1003 
1004 int
1005 aggr_grp_delete(uint32_t key)
1006 {
1007 	aggr_grp_t *grp = NULL;
1008 	aggr_port_t *port, *cport;
1009 	mod_hash_val_t val;
1010 	int err;
1011 
1012 	rw_enter(&aggr_grp_lock, RW_WRITER);
1013 
1014 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
1015 	    (mod_hash_val_t *)&grp) != 0) {
1016 		rw_exit(&aggr_grp_lock);
1017 		return (ENOENT);
1018 	}
1019 
1020 	AGGR_LACP_LOCK(grp);
1021 	rw_enter(&grp->lg_lock, RW_WRITER);
1022 
1023 	grp->lg_closing = B_TRUE;
1024 
1025 	/*
1026 	 * Unregister from the MAC service module. Since this can
1027 	 * fail if a client hasn't closed the MAC port, we gracefully
1028 	 * fail the operation.
1029 	 */
1030 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1031 		grp->lg_closing = B_FALSE;
1032 		rw_exit(&grp->lg_lock);
1033 		AGGR_LACP_UNLOCK(grp);
1034 		rw_exit(&aggr_grp_lock);
1035 		return (err);
1036 	}
1037 
1038 	/* detach and free MAC ports associated with group */
1039 	port = grp->lg_ports;
1040 	while (port != NULL) {
1041 		cport = port->lp_next;
1042 		rw_enter(&port->lp_lock, RW_WRITER);
1043 		if (grp->lg_started)
1044 			aggr_port_stop(port);
1045 		(void) aggr_grp_detach_port(grp, port);
1046 		rw_exit(&port->lp_lock);
1047 		aggr_port_delete(port);
1048 		port = cport;
1049 	}
1050 
1051 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1052 
1053 	rw_exit(&grp->lg_lock);
1054 	AGGR_LACP_UNLOCK(grp);
1055 
1056 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(key), &val);
1057 	ASSERT(grp == (aggr_grp_t *)val);
1058 
1059 	ASSERT(aggr_grp_cnt > 0);
1060 	aggr_grp_cnt--;
1061 
1062 	rw_exit(&aggr_grp_lock);
1063 	AGGR_GRP_REFRELE(grp);
1064 
1065 	return (0);
1066 }
1067 
1068 void
1069 aggr_grp_free(aggr_grp_t *grp)
1070 {
1071 	ASSERT(grp->lg_refs == 0);
1072 	kmem_cache_free(aggr_grp_cache, grp);
1073 }
1074 
1075 /*
1076  * Walker invoked when building the list of configured groups and
1077  * their ports that must be passed up to user-space.
1078  */
1079 
1080 /*ARGSUSED*/
1081 static uint_t
1082 aggr_grp_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
1083 {
1084 	aggr_grp_t *grp;
1085 	aggr_port_t *port;
1086 	aggr_grp_info_state_t *state = arg;
1087 
1088 	if (state->ls_rc != 0)
1089 		return (MH_WALK_TERMINATE);	/* terminate walk */
1090 
1091 	grp = (aggr_grp_t *)val;
1092 
1093 	rw_enter(&grp->lg_lock, RW_READER);
1094 
1095 	if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key)
1096 		goto bail;
1097 
1098 	state->ls_group_found = B_TRUE;
1099 
1100 	state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key,
1101 	    grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy,
1102 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1103 
1104 	if (state->ls_rc != 0)
1105 		goto bail;
1106 
1107 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1108 
1109 		rw_enter(&port->lp_lock, RW_READER);
1110 
1111 		state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg,
1112 		    port->lp_devname, port->lp_addr, port->lp_state,
1113 		    &port->lp_lacp.ActorOperPortState);
1114 
1115 		rw_exit(&port->lp_lock);
1116 
1117 		if (state->ls_rc != 0)
1118 			goto bail;
1119 	}
1120 
1121 bail:
1122 	rw_exit(&grp->lg_lock);
1123 	return ((state->ls_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE);
1124 }
1125 
1126 int
1127 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg,
1128     aggr_grp_info_new_grp_fn_t new_grp_fn,
1129     aggr_grp_info_new_port_fn_t new_port_fn)
1130 {
1131 	aggr_grp_info_state_t state;
1132 	int rc = 0;
1133 
1134 	rw_enter(&aggr_grp_lock, RW_READER);
1135 
1136 	*ngroups = aggr_grp_cnt;
1137 
1138 	bzero(&state, sizeof (state));
1139 	state.ls_group_key = group_key;
1140 	state.ls_new_grp_fn = new_grp_fn;
1141 	state.ls_new_port_fn = new_port_fn;
1142 	state.ls_fn_arg = fn_arg;
1143 
1144 	mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state);
1145 
1146 	if ((rc = state.ls_rc) == 0 && group_key != 0 &&
1147 	    !state.ls_group_found)
1148 		rc = ENOENT;
1149 
1150 	rw_exit(&aggr_grp_lock);
1151 	return (rc);
1152 }
1153 
1154 static void
1155 aggr_m_resources(void *arg)
1156 {
1157 	aggr_grp_t *grp = arg;
1158 	aggr_port_t *port;
1159 
1160 	/* Call each port's m_resources function */
1161 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1162 		mac_resources(port->lp_mh);
1163 }
1164 
1165 /*ARGSUSED*/
1166 static void
1167 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1168 {
1169 	miocnak(q, mp, 0, ENOTSUP);
1170 }
1171 
1172 static int
1173 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1174 {
1175 	aggr_port_t	*port;
1176 	uint_t		stat_index;
1177 
1178 	/* We only aggregate counter statistics. */
1179 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1180 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1181 		return (ENOTSUP);
1182 	}
1183 
1184 	/*
1185 	 * Counter statistics for a group are computed by aggregating the
1186 	 * counters of the members MACs while they were aggregated, plus
1187 	 * the residual counter of the group itself, which is updated each
1188 	 * time a MAC is removed from the group.
1189 	 */
1190 	*val = 0;
1191 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1192 		/* actual port statistic */
1193 		*val += aggr_port_stat(port, stat);
1194 		/*
1195 		 * minus the port stat when it was added, plus any residual
1196 		 * ammount for the group.
1197 		 */
1198 		if (IS_MAC_STAT(stat)) {
1199 			stat_index = stat - MAC_STAT_MIN;
1200 			*val -= port->lp_stat[stat_index];
1201 			*val += grp->lg_stat[stat_index];
1202 		} else if (IS_MACTYPE_STAT(stat)) {
1203 			stat_index = stat - MACTYPE_STAT_MIN;
1204 			*val -= port->lp_ether_stat[stat_index];
1205 			*val += grp->lg_ether_stat[stat_index];
1206 		}
1207 	}
1208 	return (0);
1209 }
1210 
1211 static int
1212 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1213 {
1214 	aggr_grp_t	*grp = arg;
1215 	int		rval = 0;
1216 
1217 	rw_enter(&grp->lg_lock, RW_READER);
1218 
1219 	switch (stat) {
1220 	case MAC_STAT_IFSPEED:
1221 		*val = grp->lg_ifspeed;
1222 		break;
1223 
1224 	case ETHER_STAT_LINK_DUPLEX:
1225 		*val = grp->lg_link_duplex;
1226 		break;
1227 
1228 	default:
1229 		/*
1230 		 * For all other statistics, we return the aggregated stat
1231 		 * from the underlying ports.  aggr_grp_stat() will set
1232 		 * rval appropriately if the statistic isn't a counter.
1233 		 */
1234 		rval = aggr_grp_stat(grp, stat, val);
1235 	}
1236 
1237 	rw_exit(&grp->lg_lock);
1238 	return (rval);
1239 }
1240 
1241 static int
1242 aggr_m_start(void *arg)
1243 {
1244 	aggr_grp_t *grp = arg;
1245 	aggr_port_t *port;
1246 
1247 	AGGR_LACP_LOCK(grp);
1248 	rw_enter(&grp->lg_lock, RW_WRITER);
1249 
1250 	/*
1251 	 * Attempts to start all configured members of the group.
1252 	 * Group members will be attached when their link-up notification
1253 	 * is received.
1254 	 */
1255 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1256 		rw_enter(&port->lp_lock, RW_WRITER);
1257 		if (aggr_port_start(port) != 0) {
1258 			rw_exit(&port->lp_lock);
1259 			continue;
1260 		}
1261 
1262 		/* set port promiscuous mode */
1263 		if (aggr_port_promisc(port, grp->lg_promisc) != 0)
1264 			aggr_port_stop(port);
1265 		rw_exit(&port->lp_lock);
1266 	}
1267 
1268 	grp->lg_started = B_TRUE;
1269 
1270 	rw_exit(&grp->lg_lock);
1271 	AGGR_LACP_UNLOCK(grp);
1272 
1273 	return (0);
1274 }
1275 
1276 static void
1277 aggr_m_stop(void *arg)
1278 {
1279 	aggr_grp_t *grp = arg;
1280 	aggr_port_t *port;
1281 
1282 	rw_enter(&grp->lg_lock, RW_WRITER);
1283 
1284 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1285 		rw_enter(&port->lp_lock, RW_WRITER);
1286 		aggr_port_stop(port);
1287 		rw_exit(&port->lp_lock);
1288 	}
1289 
1290 	grp->lg_started = B_FALSE;
1291 
1292 	rw_exit(&grp->lg_lock);
1293 }
1294 
1295 static int
1296 aggr_m_promisc(void *arg, boolean_t on)
1297 {
1298 	aggr_grp_t *grp = arg;
1299 	aggr_port_t *port;
1300 	boolean_t link_state_changed = B_FALSE;
1301 
1302 	AGGR_LACP_LOCK(grp);
1303 	rw_enter(&grp->lg_lock, RW_WRITER);
1304 	AGGR_GRP_REFHOLD(grp);
1305 
1306 	ASSERT(!grp->lg_closing);
1307 
1308 	if (on == grp->lg_promisc)
1309 		goto bail;
1310 
1311 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1312 		rw_enter(&port->lp_lock, RW_WRITER);
1313 		AGGR_PORT_REFHOLD(port);
1314 		if (port->lp_started) {
1315 			if (aggr_port_promisc(port, on) != 0) {
1316 				if (aggr_grp_detach_port(grp, port))
1317 					link_state_changed = B_TRUE;
1318 			} else {
1319 				/*
1320 				 * If a port was detached because of a previous
1321 				 * failure changing the promiscuity, the port
1322 				 * is reattached when it successfully changes
1323 				 * the promiscuity now, and this might cause
1324 				 * the link state of the aggregation to change.
1325 				 */
1326 				if (aggr_grp_attach_port(grp, port))
1327 					link_state_changed = B_TRUE;
1328 			}
1329 		}
1330 		rw_exit(&port->lp_lock);
1331 		AGGR_PORT_REFRELE(port);
1332 	}
1333 
1334 	grp->lg_promisc = on;
1335 
1336 	if (link_state_changed)
1337 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1338 
1339 bail:
1340 	rw_exit(&grp->lg_lock);
1341 	AGGR_LACP_UNLOCK(grp);
1342 	AGGR_GRP_REFRELE(grp);
1343 
1344 	return (0);
1345 }
1346 
1347 /*
1348  * Initialize the capabilities that are advertised for the group
1349  * according to the capabilities of the constituent ports.
1350  */
1351 static boolean_t
1352 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1353 {
1354 	aggr_grp_t *grp = arg;
1355 
1356 	switch (cap) {
1357 	case MAC_CAPAB_HCKSUM: {
1358 		uint32_t *hcksum_txflags = cap_data;
1359 		*hcksum_txflags = grp->lg_hcksum_txflags;
1360 		break;
1361 	}
1362 	case MAC_CAPAB_POLL:
1363 		/*
1364 		 * There's nothing for us to fill in, we simply return
1365 		 * B_TRUE or B_FALSE to represent the group's support
1366 		 * status for this capability.
1367 		 */
1368 		return (grp->lg_gldv3_polling);
1369 	default:
1370 		return (B_FALSE);
1371 	}
1372 	return (B_TRUE);
1373 }
1374 
1375 /*
1376  * Add or remove the multicast addresses that are defined for the group
1377  * to or from the specified port.
1378  * This function is called before stopping a port, before a port
1379  * is detached from a group, and when attaching a port to a group.
1380  */
1381 void
1382 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
1383 {
1384 	aggr_grp_t *grp = port->lp_grp;
1385 
1386 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
1387 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
1388 
1389 	if (!port->lp_started)
1390 		return;
1391 
1392 	mac_multicst_refresh(grp->lg_mh, aggr_port_multicst, port, add);
1393 }
1394 
1395 static int
1396 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1397 {
1398 	aggr_grp_t *grp = arg;
1399 	aggr_port_t *port = NULL;
1400 	int err = 0, cerr;
1401 
1402 	rw_enter(&grp->lg_lock, RW_WRITER);
1403 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1404 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
1405 			continue;
1406 		cerr = aggr_port_multicst(port, add, addrp);
1407 		if (cerr != 0 && err == 0)
1408 			err = cerr;
1409 	}
1410 	rw_exit(&grp->lg_lock);
1411 	return (err);
1412 }
1413 
1414 static int
1415 aggr_m_unicst(void *arg, const uint8_t *macaddr)
1416 {
1417 	aggr_grp_t *grp = arg;
1418 	int rc;
1419 
1420 	AGGR_LACP_LOCK(grp);
1421 	rw_enter(&grp->lg_lock, RW_WRITER);
1422 	rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
1423 	    0, 0);
1424 	rw_exit(&grp->lg_lock);
1425 	AGGR_LACP_UNLOCK(grp);
1426 
1427 	return (rc);
1428 }
1429 
1430 /*
1431  * Initialize the capabilities that are advertised for the group
1432  * according to the capabilities of the constituent ports.
1433  */
1434 static void
1435 aggr_grp_capab_set(aggr_grp_t *grp)
1436 {
1437 	uint32_t cksum;
1438 	aggr_port_t *port;
1439 
1440 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1441 	ASSERT(grp->lg_ports != NULL);
1442 
1443 	grp->lg_hcksum_txflags = (uint32_t)-1;
1444 	grp->lg_gldv3_polling = B_TRUE;
1445 
1446 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1447 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
1448 			cksum = 0;
1449 		grp->lg_hcksum_txflags &= cksum;
1450 
1451 		grp->lg_gldv3_polling &=
1452 		    mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL);
1453 	}
1454 }
1455 
1456 
1457 /*
1458  * Checks whether the capabilities of the port being added are compatible
1459  * with the current capabilities of the aggregation.
1460  */
1461 static boolean_t
1462 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
1463 {
1464 	uint32_t	hcksum_txflags;
1465 
1466 	ASSERT(grp->lg_ports != NULL);
1467 
1468 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
1469 		if (grp->lg_hcksum_txflags != 0)
1470 			return (B_FALSE);
1471 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
1472 	    grp->lg_hcksum_txflags) {
1473 		return (B_FALSE);
1474 	}
1475 
1476 	if (mac_capab_get(port->lp_mh, MAC_CAPAB_POLL, NULL) !=
1477 	    grp->lg_gldv3_polling) {
1478 		return (B_FALSE);
1479 	}
1480 
1481 	return (B_TRUE);
1482 }
1483 
1484 /*
1485  * Returns the maximum SDU according to the SDU of the constituent ports.
1486  */
1487 static uint_t
1488 aggr_grp_max_sdu(aggr_grp_t *grp)
1489 {
1490 	uint_t max_sdu = (uint_t)-1;
1491 	aggr_port_t *port;
1492 
1493 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1494 	ASSERT(grp->lg_ports != NULL);
1495 
1496 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1497 		const mac_info_t *port_mi = mac_info(port->lp_mh);
1498 		if (max_sdu > port_mi->mi_sdu_max)
1499 			max_sdu = port_mi->mi_sdu_max;
1500 	}
1501 
1502 	return (max_sdu);
1503 }
1504 
1505 /*
1506  * Checks if the maximum SDU of the specified port is compatible
1507  * with the maximum SDU of the specified aggregation group, returns
1508  * B_TRUE if it is, B_FALSE otherwise.
1509  */
1510 static boolean_t
1511 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
1512 {
1513 	const mac_info_t *port_mi = mac_info(port->lp_mh);
1514 
1515 	return (port_mi->mi_sdu_max >= grp->lg_max_sdu);
1516 }
1517