xref: /titanic_51/usr/src/uts/common/io/aggr/aggr_grp.c (revision d48713b83f032afcef6785303e68f293eacd5671)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
30  *
31  * An instance of the structure aggr_grp_t is allocated for each
32  * link aggregation group. When created, aggr_grp_t objects are
33  * entered into the aggr_grp_hash hash table maintained by the modhash
34  * module. The hash key is the port number associated with the link
35  * aggregation group. The port number associated with a group corresponds
36  * the key associated with the group.
37  *
38  * A set of MAC ports are associated with each association group.
39  */
40 
41 #include <sys/types.h>
42 #include <sys/sysmacros.h>
43 #include <sys/conf.h>
44 #include <sys/cmn_err.h>
45 #include <sys/list.h>
46 #include <sys/ksynch.h>
47 #include <sys/kmem.h>
48 #include <sys/stream.h>
49 #include <sys/modctl.h>
50 #include <sys/ddi.h>
51 #include <sys/sunddi.h>
52 #include <sys/atomic.h>
53 #include <sys/stat.h>
54 #include <sys/modhash.h>
55 #include <sys/strsun.h>
56 #include <sys/dlpi.h>
57 
58 #include <sys/aggr.h>
59 #include <sys/aggr_impl.h>
60 
61 static void aggr_m_info(void *, mac_info_t *);
62 static int aggr_m_start(void *);
63 static void aggr_m_stop(void *);
64 static int aggr_m_promisc(void *, boolean_t);
65 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
66 static int aggr_m_unicst(void *, const uint8_t *);
67 static uint64_t aggr_m_stat(void *, enum mac_stat);
68 static void aggr_m_resources(void *);
69 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
70 
71 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *, uint32_t);
72 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *);
73 static void aggr_stats_op(enum mac_stat, uint64_t *, uint64_t *, boolean_t);
74 static void aggr_grp_capab_set(aggr_grp_t *);
75 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
76 
77 static kmem_cache_t	*aggr_grp_cache;
78 static mod_hash_t	*aggr_grp_hash;
79 static krwlock_t	aggr_grp_lock;
80 static uint_t		aggr_grp_cnt;
81 
82 #define	GRP_HASHSZ		64
83 #define	GRP_HASH_KEY(key)	((mod_hash_key_t)(uintptr_t)key)
84 
85 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
86 static uchar_t aggr_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
87 
88 /* used by grp_info_walker */
89 typedef struct aggr_grp_info_state {
90 	uint32_t	ls_group_key;
91 	boolean_t	ls_group_found;
92 	aggr_grp_info_new_grp_fn_t ls_new_grp_fn;
93 	aggr_grp_info_new_port_fn_t ls_new_port_fn;
94 	void		*ls_fn_arg;
95 	int		ls_rc;
96 } aggr_grp_info_state_t;
97 
98 /*ARGSUSED*/
99 static int
100 aggr_grp_constructor(void *buf, void *arg, int kmflag)
101 {
102 	aggr_grp_t *grp = buf;
103 
104 	bzero(grp, sizeof (*grp));
105 	rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL);
106 	mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL);
107 
108 	grp->lg_link_state = LINK_STATE_UNKNOWN;
109 
110 	return (0);
111 }
112 
113 /*ARGSUSED*/
114 static void
115 aggr_grp_destructor(void *buf, void *arg)
116 {
117 	aggr_grp_t *grp = buf;
118 
119 	if (grp->lg_tx_ports != NULL) {
120 		kmem_free(grp->lg_tx_ports,
121 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
122 	}
123 
124 	mutex_destroy(&grp->aggr.gl_lock);
125 	rw_destroy(&grp->lg_lock);
126 }
127 
128 void
129 aggr_grp_init(void)
130 {
131 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
132 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
133 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
134 
135 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
136 	    GRP_HASHSZ, mod_hash_null_valdtor);
137 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
138 	aggr_grp_cnt = 0;
139 }
140 
141 void
142 aggr_grp_fini(void)
143 {
144 	rw_destroy(&aggr_grp_lock);
145 	mod_hash_destroy_idhash(aggr_grp_hash);
146 	kmem_cache_destroy(aggr_grp_cache);
147 }
148 
149 uint_t
150 aggr_grp_count(void)
151 {
152 	uint_t	count;
153 
154 	rw_enter(&aggr_grp_lock, RW_READER);
155 	count = aggr_grp_cnt;
156 	rw_exit(&aggr_grp_lock);
157 	return (count);
158 }
159 
160 /*
161  * Attach a port to a link aggregation group.
162  *
163  * A port is attached to a link aggregation group once its speed
164  * and link state have been verified.
165  *
166  * Returns B_TRUE if the group link state or speed has changed. If
167  * it's the case, the caller must notify the MAC layer via a call
168  * to mac_link().
169  */
170 boolean_t
171 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
172 {
173 	boolean_t link_changed = B_FALSE;
174 
175 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
176 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
177 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
178 
179 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
180 		return (B_FALSE);
181 
182 	/*
183 	 * Validate the MAC port link speed and update the group
184 	 * link speed if needed.
185 	 */
186 	if (port->lp_ifspeed == 0 ||
187 	    port->lp_link_state != LINK_STATE_UP ||
188 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
189 		/*
190 		 * Can't attach a MAC port with unknown link speed,
191 		 * down link, or not in full duplex mode.
192 		 */
193 		return (B_FALSE);
194 	}
195 
196 	if (grp->lg_ifspeed == 0) {
197 		/*
198 		 * The group inherits the speed of the first link being
199 		 * attached.
200 		 */
201 		grp->lg_ifspeed = port->lp_ifspeed;
202 		link_changed = B_TRUE;
203 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
204 		/*
205 		 * The link speed of the MAC port must be the same as
206 		 * the group link speed, as per 802.3ad. Since it is
207 		 * not, the attach is cancelled.
208 		 */
209 		return (B_FALSE);
210 	}
211 
212 	grp->lg_nattached_ports++;
213 
214 	/*
215 	 * Update the group link state.
216 	 */
217 	if (grp->lg_link_state != LINK_STATE_UP) {
218 		grp->lg_link_state = LINK_STATE_UP;
219 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
220 		link_changed = B_TRUE;
221 	}
222 
223 	aggr_grp_multicst_port(port, B_TRUE);
224 
225 	/*
226 	 * Update port's state.
227 	 */
228 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
229 
230 	/*
231 	 * If LACP is OFF, the port can be used to send data as soon
232 	 * as its link is up and verified to be compatible with the
233 	 * aggregation.
234 	 *
235 	 * If LACP is active or passive, notify the LACP subsystem, which
236 	 * will enable sending on the port following the LACP protocol.
237 	 */
238 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
239 		aggr_send_port_enable(port);
240 	else
241 		aggr_lacp_port_attached(port);
242 
243 	return (link_changed);
244 }
245 
246 boolean_t
247 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
248 {
249 	boolean_t link_changed = B_FALSE;
250 
251 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
252 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
253 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
254 
255 	/* update state */
256 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
257 		return (B_FALSE);
258 	port->lp_state = AGGR_PORT_STATE_STANDBY;
259 
260 	aggr_grp_multicst_port(port, B_FALSE);
261 
262 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
263 		aggr_send_port_disable(port);
264 	else
265 		aggr_lacp_port_detached(port);
266 
267 	grp->lg_nattached_ports--;
268 	if (grp->lg_nattached_ports == 0) {
269 		/* the last attached MAC port of the group is being detached */
270 		grp->lg_ifspeed = 0;
271 		grp->lg_link_state = LINK_STATE_DOWN;
272 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
273 		link_changed = B_TRUE;
274 	}
275 
276 	return (link_changed);
277 }
278 
279 /*
280  * Update the MAC addresses of the constituent ports of the specified
281  * group. This function is invoked:
282  * - after creating a new aggregation group.
283  * - after adding new ports to an aggregation group.
284  * - after removing a port from a group when the MAC address of
285  *   that port was used for the MAC address of the group.
286  * - after the MAC address of a port changed when the MAC address
287  *   of that port was used for the MAC address of the group.
288  */
289 void
290 aggr_grp_update_ports_mac(aggr_grp_t *grp)
291 {
292 	aggr_port_t *cport;
293 
294 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
295 
296 	for (cport = grp->lg_ports; cport != NULL;
297 	    cport = cport->lp_next) {
298 		rw_enter(&cport->lp_lock, RW_WRITER);
299 		if (aggr_port_unicst(cport, grp->lg_addr) != 0)
300 			(void) aggr_grp_detach_port(grp, cport);
301 		rw_exit(&cport->lp_lock);
302 		if (grp->lg_closing)
303 			break;
304 	}
305 }
306 
307 /*
308  * Invoked when the MAC address of a port has changed. If the port's
309  * MAC address was used for the group MAC address, returns B_TRUE.
310  * In that case, it is the responsibility of the caller to
311  * invoke aggr_grp_update_ports_mac() after releasing the
312  * the port lock, and aggr_grp_notify() after releasing the
313  * group lock.
314  */
315 boolean_t
316 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port)
317 {
318 	boolean_t grp_addr_changed = B_FALSE;
319 
320 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
321 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
322 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
323 
324 	if (grp->lg_addr_fixed) {
325 		/*
326 		 * The group is using a fixed MAC address or an automatic
327 		 * MAC address has not been set.
328 		 */
329 		return (B_FALSE);
330 	}
331 
332 	if (grp->lg_mac_addr_port == port) {
333 		/*
334 		 * The MAC address of the port was assigned to the group
335 		 * MAC address. Update the group MAC address.
336 		 */
337 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
338 		grp_addr_changed = B_TRUE;
339 	} else {
340 		/*
341 		 * Update the actual port MAC address to the MAC address
342 		 * of the group.
343 		 */
344 		if (aggr_port_unicst(port, grp->lg_addr) != 0)
345 			(void) aggr_grp_detach_port(grp, port);
346 	}
347 
348 	return (grp_addr_changed);
349 }
350 
351 /*
352  * Add a port to a link aggregation group.
353  */
354 static int
355 aggr_grp_add_port(aggr_grp_t *grp, const char *name, uint_t portnum,
356     aggr_port_t **pp)
357 {
358 	aggr_port_t *port, **cport;
359 	int err;
360 
361 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
362 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
363 
364 	/* create new port */
365 	err = aggr_port_create(name, portnum, &port);
366 	if (err != 0)
367 		return (err);
368 
369 	rw_enter(&port->lp_lock, RW_WRITER);
370 
371 	/* add port to list of group constituent ports */
372 	cport = &grp->lg_ports;
373 	while (*cport != NULL)
374 		cport = &((*cport)->lp_next);
375 	*cport = port;
376 
377 	/*
378 	 * Back reference to the group it is member of. A port always
379 	 * holds a reference to its group to ensure that the back
380 	 * reference is always valid.
381 	 */
382 	port->lp_grp = grp;
383 	AGGR_GRP_REFHOLD(grp);
384 	grp->lg_nports++;
385 
386 	aggr_lacp_init_port(port);
387 
388 	rw_exit(&port->lp_lock);
389 
390 	if (pp != NULL)
391 		*pp = port;
392 
393 	return (0);
394 }
395 
396 /*
397  * Add one or more ports to an existing link aggregation group.
398  */
399 int
400 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports)
401 {
402 	int rc, i, nadded = 0;
403 	aggr_grp_t *grp = NULL;
404 	aggr_port_t *port;
405 
406 	/* get group corresponding to key */
407 	rw_enter(&aggr_grp_lock, RW_READER);
408 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
409 	    (mod_hash_val_t *)&grp) != 0) {
410 		rw_exit(&aggr_grp_lock);
411 		return (ENOENT);
412 	}
413 	AGGR_GRP_REFHOLD(grp);
414 	rw_exit(&aggr_grp_lock);
415 
416 	AGGR_LACP_LOCK(grp);
417 	rw_enter(&grp->lg_lock, RW_WRITER);
418 
419 	/* add the specified ports to group */
420 	for (i = 0; i < nports; i++) {
421 		/* add port to group */
422 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname,
423 		    ports[i].lp_port, &port)) != 0)
424 			goto bail;
425 		ASSERT(port != NULL);
426 		nadded++;
427 
428 		/* check capabilities */
429 		if (!aggr_grp_capab_check(grp, port)) {
430 			rc = ENOTSUP;
431 			goto bail;
432 		}
433 
434 		/* start port if group has already been started */
435 		if (grp->lg_started) {
436 			rw_enter(&port->lp_lock, RW_WRITER);
437 			rc = aggr_port_start(port);
438 			if (rc != 0) {
439 				rw_exit(&port->lp_lock);
440 				goto bail;
441 			}
442 
443 			/* set port promiscuous mode */
444 			rc = aggr_port_promisc(port, grp->lg_promisc);
445 			if (rc != 0) {
446 				rw_exit(&port->lp_lock);
447 				goto bail;
448 			}
449 			rw_exit(&port->lp_lock);
450 		}
451 	}
452 
453 	/* update the MAC address of the constituent ports */
454 	aggr_grp_update_ports_mac(grp);
455 
456 bail:
457 	if (rc != 0) {
458 		/* stop and remove ports that have been added */
459 		for (i = 0; i < nadded && !grp->lg_closing; i++) {
460 			port = aggr_grp_port_lookup(grp, ports[i].lp_devname,
461 			    ports[i].lp_port);
462 			ASSERT(port != NULL);
463 			if (grp->lg_started) {
464 				rw_enter(&port->lp_lock, RW_WRITER);
465 				aggr_port_stop(port);
466 				rw_exit(&port->lp_lock);
467 			}
468 			(void) aggr_grp_rem_port(grp, port, NULL);
469 		}
470 	}
471 
472 	rw_exit(&grp->lg_lock);
473 	AGGR_LACP_UNLOCK(grp);
474 	if (rc == 0 && !grp->lg_closing)
475 		mac_resource_update(&grp->lg_mac);
476 	AGGR_GRP_REFRELE(grp);
477 	return (rc);
478 }
479 
480 /*
481  * Update properties of an existing link aggregation group.
482  */
483 int
484 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask,
485     uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr,
486     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
487 {
488 	int rc = 0;
489 	aggr_grp_t *grp = NULL;
490 	boolean_t mac_addr_changed = B_FALSE;
491 
492 	if (grp_arg == NULL) {
493 		/* get group corresponding to key */
494 		rw_enter(&aggr_grp_lock, RW_READER);
495 		if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
496 		    (mod_hash_val_t *)&grp) != 0) {
497 			rc = ENOENT;
498 			goto bail;
499 		}
500 		AGGR_LACP_LOCK(grp);
501 		rw_enter(&grp->lg_lock, RW_WRITER);
502 	} else {
503 		grp = grp_arg;
504 		ASSERT(AGGR_LACP_LOCK_HELD(grp));
505 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
506 	}
507 
508 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
509 	AGGR_GRP_REFHOLD(grp);
510 
511 	/* validate fixed address if specified */
512 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
513 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
514 	    (mac_addr[0] & 0x01))) {
515 		rc = EINVAL;
516 		goto bail;
517 	}
518 
519 	/* update policy if requested */
520 	if (update_mask & AGGR_MODIFY_POLICY)
521 		aggr_send_update_policy(grp, policy);
522 
523 	/* update unicast MAC address if requested */
524 	if (update_mask & AGGR_MODIFY_MAC) {
525 		if (mac_fixed) {
526 			/* user-supplied MAC address */
527 			grp->lg_mac_addr_port = NULL;
528 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
529 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
530 				mac_addr_changed = B_TRUE;
531 			}
532 		} else if (grp->lg_addr_fixed) {
533 			/* switch from user-supplied to automatic */
534 			aggr_port_t *port = grp->lg_ports;
535 
536 			rw_enter(&port->lp_lock, RW_WRITER);
537 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
538 			grp->lg_mac_addr_port = port;
539 			mac_addr_changed = B_TRUE;
540 			rw_exit(&port->lp_lock);
541 		}
542 		grp->lg_addr_fixed = mac_fixed;
543 	}
544 
545 	if (mac_addr_changed)
546 		aggr_grp_update_ports_mac(grp);
547 
548 	if (update_mask & AGGR_MODIFY_LACP_MODE)
549 		aggr_lacp_update_mode(grp, lacp_mode);
550 
551 	if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing)
552 		aggr_lacp_update_timer(grp, lacp_timer);
553 
554 bail:
555 	if (grp_arg == NULL) {
556 		if (grp != NULL) {
557 			rw_exit(&grp->lg_lock);
558 			AGGR_LACP_UNLOCK(grp);
559 		}
560 		rw_exit(&aggr_grp_lock);
561 		/* pass new unicast address up to MAC layer */
562 		if (grp != NULL && mac_addr_changed && !grp->lg_closing)
563 			mac_unicst_update(&grp->lg_mac, grp->lg_addr);
564 	}
565 
566 	if (grp != NULL)
567 		AGGR_GRP_REFRELE(grp);
568 
569 	return (rc);
570 }
571 
572 /*
573  * Create a new link aggregation group upon request from administrator.
574  * Returns 0 on success, an errno on failure.
575  */
576 int
577 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports,
578     uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr,
579     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
580 {
581 	aggr_grp_t *grp = NULL;
582 	aggr_port_t *port;
583 	mac_t *mac;
584 	mac_info_t *mip;
585 	int err;
586 	int i;
587 
588 	/* need at least one port */
589 	if (nports == 0)
590 		return (EINVAL);
591 
592 	rw_enter(&aggr_grp_lock, RW_WRITER);
593 
594 	/* does a group with the same key already exist? */
595 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
596 	    (mod_hash_val_t *)&grp);
597 	if (err == 0) {
598 		rw_exit(&aggr_grp_lock);
599 		return (EEXIST);
600 	}
601 
602 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
603 
604 	AGGR_LACP_LOCK(grp);
605 	rw_enter(&grp->lg_lock, RW_WRITER);
606 
607 	grp->lg_refs = 1;
608 	grp->lg_closing = B_FALSE;
609 	grp->lg_key = key;
610 
611 	grp->lg_ifspeed = 0;
612 	grp->lg_link_state = LINK_STATE_UNKNOWN;
613 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
614 	grp->lg_started = B_FALSE;
615 	grp->lg_promisc = B_FALSE;
616 	aggr_lacp_init_grp(grp);
617 
618 	/* add MAC ports to group */
619 	grp->lg_ports = NULL;
620 	grp->lg_nports = 0;
621 	grp->lg_nattached_ports = 0;
622 	grp->lg_ntx_ports = 0;
623 
624 	for (i = 0; i < nports; i++) {
625 		err = aggr_grp_add_port(grp, ports[i].lp_devname,
626 		    ports[i].lp_port, NULL);
627 		if (err != 0)
628 			goto bail;
629 	}
630 
631 	/*
632 	 * If no explicit MAC address was specified by the administrator,
633 	 * set it to the MAC address of the first port.
634 	 */
635 	grp->lg_addr_fixed = mac_fixed;
636 	if (grp->lg_addr_fixed) {
637 		/* validate specified address */
638 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
639 			err = EINVAL;
640 			goto bail;
641 		}
642 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
643 	} else {
644 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
645 		grp->lg_mac_addr_port = grp->lg_ports;
646 	}
647 
648 	/* update the MAC address of the constituent ports */
649 	aggr_grp_update_ports_mac(grp);
650 
651 	/* update outbound load balancing policy */
652 	aggr_send_update_policy(grp, policy);
653 
654 	/* register with the MAC module */
655 	mac = &grp->lg_mac;
656 	bzero(mac, sizeof (*mac));
657 
658 	mac->m_ident = MAC_IDENT;
659 
660 	mac->m_driver = grp;
661 	mac->m_dip = aggr_dip;
662 	mac->m_port = key;
663 
664 	mip = &(mac->m_info);
665 	mip->mi_media = DL_ETHER;
666 	mip->mi_sdu_min = 0;
667 	mip->mi_sdu_max = ETHERMTU;
668 
669 	MAC_STAT_MIB(mip->mi_stat);
670 	MAC_STAT_ETHER(mip->mi_stat);
671 	mip->mi_stat[MAC_STAT_LINK_DUPLEX] = B_TRUE;
672 
673 	mip->mi_addr_length = ETHERADDRL;
674 	bcopy(aggr_brdcst_mac, mip->mi_brdcst_addr, ETHERADDRL);
675 	bcopy(grp->lg_addr, mip->mi_unicst_addr, ETHERADDRL);
676 
677 	mac->m_stat = aggr_m_stat;
678 	mac->m_start = aggr_m_start;
679 	mac->m_stop = aggr_m_stop;
680 	mac->m_promisc = aggr_m_promisc;
681 	mac->m_multicst = aggr_m_multicst;
682 	mac->m_unicst = aggr_m_unicst;
683 	mac->m_tx = aggr_m_tx;
684 	mac->m_resources = aggr_m_resources;
685 	mac->m_ioctl = aggr_m_ioctl;
686 
687 	/* set the initial group capabilities */
688 	aggr_grp_capab_set(grp);
689 
690 	if ((err = mac_register(mac)) != 0)
691 		goto bail;
692 
693 	/* set LACP mode */
694 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
695 
696 	/* add new group to hash table */
697 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(key),
698 	    (mod_hash_val_t)grp);
699 	ASSERT(err == 0);
700 	aggr_grp_cnt++;
701 
702 	rw_exit(&grp->lg_lock);
703 	AGGR_LACP_UNLOCK(grp);
704 	rw_exit(&aggr_grp_lock);
705 	return (0);
706 
707 bail:
708 	if (grp != NULL) {
709 		aggr_port_t *cport;
710 
711 		port = grp->lg_ports;
712 		while (port != NULL) {
713 			cport = port->lp_next;
714 			aggr_port_delete(port);
715 			port = cport;
716 		}
717 
718 		rw_exit(&grp->lg_lock);
719 		AGGR_LACP_UNLOCK(grp);
720 
721 		kmem_cache_free(aggr_grp_cache, grp);
722 	}
723 
724 	rw_exit(&aggr_grp_lock);
725 	return (err);
726 }
727 
728 /*
729  * Return a pointer to the member of a group with specified device name
730  * and port number.
731  */
732 static aggr_port_t *
733 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname, uint32_t portnum)
734 {
735 	aggr_port_t *port;
736 
737 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
738 
739 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
740 		if ((strcmp(port->lp_devname, devname) == 0) &&
741 		    (port->lp_port == portnum))
742 			break;
743 	}
744 
745 	return (port);
746 }
747 
748 /*
749  * Stop, detach and remove a port from a link aggregation group.
750  */
751 static int
752 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t *do_notify)
753 {
754 	aggr_port_t **pport;
755 	boolean_t grp_mac_addr_changed = B_FALSE;
756 	uint64_t val;
757 	uint_t i;
758 
759 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
760 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
761 	ASSERT(grp->lg_nports > 1);
762 
763 	if (do_notify != NULL)
764 		*do_notify = B_FALSE;
765 
766 	/* unlink port */
767 	for (pport = &grp->lg_ports; *pport != port;
768 	    pport = &(*pport)->lp_next) {
769 		if (*pport == NULL)
770 			return (ENOENT);
771 	}
772 	*pport = port->lp_next;
773 
774 	rw_enter(&port->lp_lock, RW_WRITER);
775 	port->lp_closing = B_TRUE;
776 
777 	/*
778 	 * If the MAC address of the port being removed was assigned
779 	 * to the group, update the group MAC address
780 	 * using the MAC address of a different port.
781 	 */
782 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
783 		/*
784 		 * Set the MAC address of the group to the
785 		 * MAC address of its first port.
786 		 */
787 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
788 		grp->lg_mac_addr_port = grp->lg_ports;
789 		grp_mac_addr_changed = B_TRUE;
790 	}
791 
792 	(void) aggr_grp_detach_port(grp, port);
793 
794 	/*
795 	 * Add the statistics of the ports while it was aggregated
796 	 * to the group's residual statistics.
797 	 */
798 	for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) {
799 		/* avoid stats that are not counters */
800 		if (i == MAC_STAT_IFSPEED || i == MAC_STAT_LINK_DUPLEX)
801 			continue;
802 
803 		/* get current value */
804 		val = aggr_port_stat(port, i);
805 		/* subtract value at the point of aggregation */
806 		val -= port->lp_stat[i];
807 		/* add to the residual stat */
808 		grp->lg_stat[i] += val;
809 	}
810 
811 	grp->lg_nports--;
812 
813 	rw_exit(&port->lp_lock);
814 
815 	aggr_port_delete(port);
816 
817 	/*
818 	 * If the group MAC address has changed, update the MAC address of
819 	 * the remaining consistuent ports according to the new MAC
820 	 * address of the group.
821 	 */
822 	if (grp->lg_closing) {
823 		*do_notify = B_FALSE;
824 	} else {
825 		if (grp_mac_addr_changed)
826 			aggr_grp_update_ports_mac(grp);
827 
828 		if (do_notify != NULL)
829 			*do_notify = grp_mac_addr_changed;
830 	}
831 
832 	return (0);
833 }
834 
835 /*
836  * Remove one or more ports from an existing link aggregation group.
837  */
838 int
839 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports)
840 {
841 	int rc = 0, i;
842 	aggr_grp_t *grp = NULL;
843 	aggr_port_t *port;
844 	boolean_t notify = B_FALSE, grp_mac_addr_changed;
845 
846 	/* get group corresponding to key */
847 	rw_enter(&aggr_grp_lock, RW_READER);
848 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
849 	    (mod_hash_val_t *)&grp) != 0) {
850 		rw_exit(&aggr_grp_lock);
851 		return (ENOENT);
852 	}
853 	AGGR_GRP_REFHOLD(grp);
854 	rw_exit(&aggr_grp_lock);
855 
856 	AGGR_LACP_LOCK(grp);
857 	rw_enter(&grp->lg_lock, RW_WRITER);
858 
859 	/* we need to keep at least one port per group */
860 	if (nports >= grp->lg_nports) {
861 		rc = EINVAL;
862 		goto bail;
863 	}
864 
865 	/* first verify that all the groups are valid */
866 	for (i = 0; i < nports; i++) {
867 		if (aggr_grp_port_lookup(grp, ports[i].lp_devname,
868 		    ports[i].lp_port) == NULL) {
869 			/* port not found */
870 			rc = ENOENT;
871 			goto bail;
872 		}
873 	}
874 
875 	/* remove the specified ports from group */
876 	for (i = 0; i < nports && !grp->lg_closing; i++) {
877 		/* lookup port */
878 		port = aggr_grp_port_lookup(grp, ports[i].lp_devname,
879 		    ports[i].lp_port);
880 		ASSERT(port != NULL);
881 
882 		/* stop port if group has already been started */
883 		if (grp->lg_started) {
884 			rw_enter(&port->lp_lock, RW_WRITER);
885 			aggr_port_stop(port);
886 			rw_exit(&port->lp_lock);
887 		}
888 
889 		/* remove port from group */
890 		rc = aggr_grp_rem_port(grp, port, &grp_mac_addr_changed);
891 		ASSERT(rc == 0);
892 		notify = notify || grp_mac_addr_changed;
893 	}
894 
895 bail:
896 	rw_exit(&grp->lg_lock);
897 	AGGR_LACP_UNLOCK(grp);
898 	if (notify && !grp->lg_closing)
899 		mac_unicst_update(&grp->lg_mac, grp->lg_addr);
900 	if (rc == 0 && !grp->lg_closing)
901 		mac_resource_update(&grp->lg_mac);
902 	AGGR_GRP_REFRELE(grp);
903 
904 	return (rc);
905 }
906 
907 int
908 aggr_grp_delete(uint32_t key)
909 {
910 	aggr_grp_t *grp = NULL;
911 	aggr_port_t *port, *cport;
912 	mod_hash_val_t val;
913 
914 	rw_enter(&aggr_grp_lock, RW_WRITER);
915 
916 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(key),
917 	    (mod_hash_val_t *)&grp) != 0) {
918 		rw_exit(&aggr_grp_lock);
919 		return (ENOENT);
920 	}
921 	AGGR_LACP_LOCK(grp);
922 	rw_enter(&grp->lg_lock, RW_WRITER);
923 	grp->lg_closing = B_TRUE;
924 
925 	/*
926 	 * Unregister from the MAC service module. Since this can
927 	 * fail if a client hasn't closed the MAC port, we gracefully
928 	 * fail the operation.
929 	 */
930 	if (mac_unregister(&grp->lg_mac)) {
931 		rw_exit(&grp->lg_lock);
932 		AGGR_LACP_UNLOCK(grp);
933 		rw_exit(&aggr_grp_lock);
934 		return (EBUSY);
935 	}
936 
937 	/* detach and free MAC ports associated with group */
938 	port = grp->lg_ports;
939 	while (port != NULL) {
940 		cport = port->lp_next;
941 		rw_enter(&port->lp_lock, RW_WRITER);
942 		if (grp->lg_started)
943 			aggr_port_stop(port);
944 		(void) aggr_grp_detach_port(grp, port);
945 		rw_exit(&port->lp_lock);
946 		aggr_port_delete(port);
947 		port = cport;
948 	}
949 
950 	rw_exit(&grp->lg_lock);
951 	AGGR_LACP_UNLOCK(grp);
952 
953 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(key), &val);
954 	ASSERT(grp == (aggr_grp_t *)val);
955 
956 	ASSERT(aggr_grp_cnt > 0);
957 	aggr_grp_cnt--;
958 
959 	rw_exit(&aggr_grp_lock);
960 	AGGR_GRP_REFRELE(grp);
961 
962 	return (0);
963 }
964 
965 void
966 aggr_grp_free(aggr_grp_t *grp)
967 {
968 	ASSERT(grp->lg_refs == 0);
969 	kmem_cache_free(aggr_grp_cache, grp);
970 }
971 
972 /*
973  * Walker invoked when building the list of configured groups and
974  * their ports that must be passed up to user-space.
975  */
976 
977 /*ARGSUSED*/
978 static uint_t
979 aggr_grp_info_walker(mod_hash_key_t key, mod_hash_val_t *val, void *arg)
980 {
981 	aggr_grp_t *grp;
982 	aggr_port_t *port;
983 	aggr_grp_info_state_t *state = arg;
984 
985 	if (state->ls_rc != 0)
986 		return (MH_WALK_TERMINATE);	/* terminate walk */
987 
988 	grp = (aggr_grp_t *)val;
989 
990 	rw_enter(&grp->lg_lock, RW_READER);
991 
992 	if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key)
993 		goto bail;
994 
995 	state->ls_group_found = B_TRUE;
996 
997 	state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key,
998 	    grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy,
999 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1000 
1001 	if (state->ls_rc != 0)
1002 		goto bail;
1003 
1004 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1005 
1006 		rw_enter(&port->lp_lock, RW_READER);
1007 
1008 		state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg,
1009 		    port->lp_devname, port->lp_port, port->lp_addr,
1010 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1011 
1012 		rw_exit(&port->lp_lock);
1013 
1014 		if (state->ls_rc != 0)
1015 			goto bail;
1016 	}
1017 
1018 bail:
1019 	rw_exit(&grp->lg_lock);
1020 	return ((state->ls_rc == 0) ? MH_WALK_CONTINUE : MH_WALK_TERMINATE);
1021 }
1022 
1023 int
1024 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg,
1025     aggr_grp_info_new_grp_fn_t new_grp_fn,
1026     aggr_grp_info_new_port_fn_t new_port_fn)
1027 {
1028 	aggr_grp_info_state_t state;
1029 	int rc = 0;
1030 
1031 	rw_enter(&aggr_grp_lock, RW_READER);
1032 
1033 	*ngroups = aggr_grp_cnt;
1034 
1035 	bzero(&state, sizeof (state));
1036 	state.ls_group_key = group_key;
1037 	state.ls_new_grp_fn = new_grp_fn;
1038 	state.ls_new_port_fn = new_port_fn;
1039 	state.ls_fn_arg = fn_arg;
1040 
1041 	mod_hash_walk(aggr_grp_hash, aggr_grp_info_walker, &state);
1042 
1043 	if ((rc = state.ls_rc) == 0 && group_key != 0 &&
1044 	    !state.ls_group_found)
1045 		rc = ENOENT;
1046 
1047 	rw_exit(&aggr_grp_lock);
1048 	return (rc);
1049 }
1050 
1051 static void
1052 aggr_m_resources(void *arg)
1053 {
1054 	aggr_grp_t *grp = arg;
1055 	aggr_port_t *port;
1056 
1057 	/* Call each port's m_resources function */
1058 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1059 		mac_resources(port->lp_mh);
1060 }
1061 
1062 /*ARGSUSED*/
1063 static void
1064 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1065 {
1066 	miocnak(q, mp, 0, ENOTSUP);
1067 }
1068 
1069 static uint64_t
1070 aggr_m_stat(void *arg, enum mac_stat stat)
1071 {
1072 	aggr_grp_t *grp = arg;
1073 	aggr_port_t *port;
1074 	uint64_t val;
1075 
1076 	rw_enter(&grp->lg_lock, RW_READER);
1077 
1078 	switch (stat) {
1079 	case MAC_STAT_IFSPEED:
1080 		val = grp->lg_ifspeed;
1081 		break;
1082 	case MAC_STAT_LINK_DUPLEX:
1083 		val = grp->lg_link_duplex;
1084 		break;
1085 	default:
1086 		/*
1087 		 * The remaining statistics are counters. They are computed
1088 		 * by aggregating the counters of the members MACs while they
1089 		 * were aggregated, plus the residual counter of the group
1090 		 * itself, which is updated each time a MAC is removed from
1091 		 * the group.
1092 		 */
1093 		val = 0;
1094 		for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1095 			/* actual port statistic */
1096 			val += aggr_port_stat(port, stat);
1097 			/* minus the port stat when it was added */
1098 			val -= port->lp_stat[stat];
1099 			/* plus any residual amount for the group */
1100 			val += grp->lg_stat[stat];
1101 		}
1102 	}
1103 
1104 	rw_exit(&grp->lg_lock);
1105 	return (val);
1106 }
1107 
1108 static int
1109 aggr_m_start(void *arg)
1110 {
1111 	aggr_grp_t *grp = arg;
1112 	aggr_port_t *port;
1113 
1114 	AGGR_LACP_LOCK(grp);
1115 	rw_enter(&grp->lg_lock, RW_WRITER);
1116 
1117 	/*
1118 	 * Attempts to start all configured members of the group.
1119 	 * Group members will be attached when their link-up notification
1120 	 * is received.
1121 	 */
1122 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1123 		rw_enter(&port->lp_lock, RW_WRITER);
1124 		if (aggr_port_start(port) != 0) {
1125 			rw_exit(&port->lp_lock);
1126 			continue;
1127 		}
1128 
1129 		/* set port promiscuous mode */
1130 		if (aggr_port_promisc(port, grp->lg_promisc) != 0)
1131 			aggr_port_stop(port);
1132 		rw_exit(&port->lp_lock);
1133 	}
1134 
1135 	grp->lg_started = B_TRUE;
1136 
1137 	rw_exit(&grp->lg_lock);
1138 	AGGR_LACP_UNLOCK(grp);
1139 
1140 	return (0);
1141 }
1142 
1143 static void
1144 aggr_m_stop(void *arg)
1145 {
1146 	aggr_grp_t *grp = arg;
1147 	aggr_port_t *port;
1148 
1149 	rw_enter(&grp->lg_lock, RW_WRITER);
1150 
1151 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1152 		rw_enter(&port->lp_lock, RW_WRITER);
1153 		aggr_port_stop(port);
1154 		rw_exit(&port->lp_lock);
1155 	}
1156 
1157 	grp->lg_started = B_FALSE;
1158 
1159 	rw_exit(&grp->lg_lock);
1160 }
1161 
1162 static int
1163 aggr_m_promisc(void *arg, boolean_t on)
1164 {
1165 	aggr_grp_t *grp = arg;
1166 	aggr_port_t *port;
1167 
1168 	AGGR_LACP_LOCK(grp);
1169 	rw_enter(&grp->lg_lock, RW_WRITER);
1170 	AGGR_GRP_REFHOLD(grp);
1171 
1172 	if (on == grp->lg_promisc)
1173 		goto bail;
1174 
1175 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1176 		rw_enter(&port->lp_lock, RW_WRITER);
1177 		AGGR_PORT_REFHOLD(port);
1178 		if (port->lp_started) {
1179 			if (aggr_port_promisc(port, on) != 0)
1180 				(void) aggr_grp_detach_port(grp, port);
1181 		}
1182 		rw_exit(&port->lp_lock);
1183 		AGGR_PORT_REFRELE(port);
1184 		if (grp->lg_closing)
1185 			break;
1186 	}
1187 
1188 	grp->lg_promisc = on;
1189 
1190 bail:
1191 	rw_exit(&grp->lg_lock);
1192 	AGGR_LACP_UNLOCK(grp);
1193 	AGGR_GRP_REFRELE(grp);
1194 
1195 	return (0);
1196 }
1197 
1198 /*
1199  * Add or remove the multicast addresses that are defined for the group
1200  * to or from the specified port.
1201  * This function is called before stopping a port, before a port
1202  * is detached from a group, and when attaching a port to a group.
1203  */
1204 void
1205 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
1206 {
1207 	aggr_grp_t *grp = port->lp_grp;
1208 
1209 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
1210 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
1211 
1212 	if (!port->lp_started)
1213 		return;
1214 
1215 	mac_multicst_refresh(&grp->lg_mac, aggr_port_multicst, port,
1216 	    add);
1217 }
1218 
1219 static int
1220 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1221 {
1222 	aggr_grp_t *grp = arg;
1223 	aggr_port_t *port = NULL;
1224 	int err = 0, cerr;
1225 
1226 	rw_enter(&grp->lg_lock, RW_WRITER);
1227 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1228 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
1229 			continue;
1230 		cerr = aggr_port_multicst(port, add, addrp);
1231 		if (cerr != 0 && err == 0)
1232 			err = cerr;
1233 	}
1234 	rw_exit(&grp->lg_lock);
1235 	return (err);
1236 }
1237 
1238 static int
1239 aggr_m_unicst(void *arg, const uint8_t *macaddr)
1240 {
1241 	aggr_grp_t *grp = arg;
1242 	int rc;
1243 
1244 	AGGR_LACP_LOCK(grp);
1245 	rw_enter(&grp->lg_lock, RW_WRITER);
1246 	rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
1247 	    0, 0);
1248 	rw_exit(&grp->lg_lock);
1249 	AGGR_LACP_UNLOCK(grp);
1250 
1251 	return (rc);
1252 }
1253 
1254 /*
1255  * Initialize the capabilities that are advertised for the group
1256  * according to the capabilities of the constituent ports.
1257  */
1258 static void
1259 aggr_grp_capab_set(aggr_grp_t *grp)
1260 {
1261 	uint32_t cksum = (uint32_t)-1;
1262 	uint32_t poll = DL_CAPAB_POLL;
1263 	aggr_port_t *port;
1264 	const mac_info_t *port_mi;
1265 
1266 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1267 
1268 	ASSERT(grp->lg_ports != NULL);
1269 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1270 		port_mi = mac_info(port->lp_mh);
1271 		cksum &= port_mi->mi_cksum;
1272 		poll &= port_mi->mi_poll;
1273 	}
1274 
1275 	grp->lg_mac.m_info.mi_cksum = cksum;
1276 	grp->lg_mac.m_info.mi_poll = poll;
1277 }
1278 
1279 /*
1280  * Checks whether the capabilities of the ports being added are compatible
1281  * with the current capabilities of the aggregation.
1282  */
1283 static boolean_t
1284 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
1285 {
1286 	const mac_info_t *port_mi = mac_info(port->lp_mh);
1287 	uint32_t grp_cksum = grp->lg_mac.m_info.mi_cksum;
1288 
1289 	ASSERT(grp->lg_ports != NULL);
1290 
1291 	return (((grp_cksum & port_mi->mi_cksum) == grp_cksum) &&
1292 	    (grp->lg_mac.m_info.mi_poll == port_mi->mi_poll));
1293 }
1294