xref: /titanic_50/usr/src/uts/common/io/aggr/aggr_grp.c (revision 554ff184129088135ad2643c1c9832174a17be88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
31  *
32  * An instance of the structure aggr_grp_t is allocated for each
33  * link aggregation group. When created, aggr_grp_t objects are
34  * entered into the aggr_grp_hash hash table maintained by the GHT
35  * module. The hash key is the port number associated with the link
36  * aggregation group. The port number associated with a group corresponds
37  * the key associated with the group.
38  *
39  * A set of MAC ports are associated with each association group.
40  */
41 
42 #include <sys/types.h>
43 #include <sys/sysmacros.h>
44 #include <sys/conf.h>
45 #include <sys/cmn_err.h>
46 #include <sys/list.h>
47 #include <sys/ksynch.h>
48 #include <sys/kmem.h>
49 #include <sys/stream.h>
50 #include <sys/modctl.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/atomic.h>
54 #include <sys/stat.h>
55 #include <sys/ght.h>
56 #include <sys/strsun.h>
57 #include <sys/dlpi.h>
58 
59 #include <sys/aggr.h>
60 #include <sys/aggr_impl.h>
61 
62 static void aggr_m_info(void *, mac_info_t *);
63 static int aggr_m_start(void *);
64 static void aggr_m_stop(void *);
65 static int aggr_m_promisc(void *, boolean_t);
66 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
67 static int aggr_m_unicst(void *, const uint8_t *);
68 static uint64_t aggr_m_stat(void *, enum mac_stat);
69 static void aggr_m_resources(void *);
70 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
71 
72 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, const char *, uint32_t);
73 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *);
74 static void aggr_stats_op(enum mac_stat, uint64_t *, uint64_t *, boolean_t);
75 static void aggr_grp_capab_set(aggr_grp_t *);
76 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
77 
78 static kmem_cache_t *aggr_grp_cache;
79 static ght_t aggr_grp_hash;
80 
81 #define	GRP_HASHSZ		64
82 
83 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
84 static uchar_t aggr_brdcst_mac[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
85 
86 /* used by grp_info_walker */
87 typedef struct aggr_grp_info_state {
88 	uint32_t	ls_group_key;
89 	boolean_t	ls_group_found;
90 	aggr_grp_info_new_grp_fn_t ls_new_grp_fn;
91 	aggr_grp_info_new_port_fn_t ls_new_port_fn;
92 	void		*ls_fn_arg;
93 	int		ls_rc;
94 } aggr_grp_info_state_t;
95 
96 /*ARGSUSED*/
97 static int
98 aggr_grp_constructor(void *buf, void *arg, int kmflag)
99 {
100 	aggr_grp_t *grp = buf;
101 
102 	bzero(grp, sizeof (*grp));
103 	rw_init(&grp->lg_lock, NULL, RW_DRIVER, NULL);
104 	mutex_init(&grp->aggr.gl_lock, NULL, MUTEX_DEFAULT, NULL);
105 
106 	grp->lg_link_state = LINK_STATE_UNKNOWN;
107 
108 	return (0);
109 }
110 
111 /*ARGSUSED*/
112 static void
113 aggr_grp_destructor(void *buf, void *arg)
114 {
115 	aggr_grp_t *grp = buf;
116 
117 	if (grp->lg_tx_ports != NULL) {
118 		kmem_free(grp->lg_tx_ports,
119 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
120 	}
121 
122 	mutex_destroy(&grp->aggr.gl_lock);
123 	rw_destroy(&grp->lg_lock);
124 }
125 
126 void
127 aggr_grp_init(void)
128 {
129 	int err;
130 
131 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
132 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
133 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
134 
135 	err = ght_scalar_create("aggr_grp_hash", GRP_HASHSZ,
136 	    &aggr_grp_hash);
137 	ASSERT(err == 0);
138 }
139 
140 int
141 aggr_grp_fini(void)
142 {
143 	int err;
144 
145 	if ((err = ght_destroy(aggr_grp_hash)) != 0)
146 		return (err);
147 	kmem_cache_destroy(aggr_grp_cache);
148 	return (0);
149 }
150 
151 /*
152  * Attach a port to a link aggregation group.
153  *
154  * A port is attached to a link aggregation group once its speed
155  * and link state have been verified.
156  *
157  * Returns B_TRUE if the group link state or speed has changed. If
158  * it's the case, the caller must notify the MAC layer via a call
159  * to mac_link().
160  */
161 boolean_t
162 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
163 {
164 	boolean_t link_changed = B_FALSE;
165 
166 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
167 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
168 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
169 
170 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
171 		return (B_FALSE);
172 
173 	/*
174 	 * Validate the MAC port link speed and update the group
175 	 * link speed if needed.
176 	 */
177 	if (port->lp_ifspeed == 0 ||
178 	    port->lp_link_state != LINK_STATE_UP ||
179 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
180 		/*
181 		 * Can't attach a MAC port with unknown link speed,
182 		 * down link, or not in full duplex mode.
183 		 */
184 		return (B_FALSE);
185 	}
186 
187 	if (grp->lg_ifspeed == 0) {
188 		/*
189 		 * The group inherits the speed of the first link being
190 		 * attached.
191 		 */
192 		grp->lg_ifspeed = port->lp_ifspeed;
193 		link_changed = B_TRUE;
194 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
195 		/*
196 		 * The link speed of the MAC port must be the same as
197 		 * the group link speed, as per 802.3ad. Since it is
198 		 * not, the attach is cancelled.
199 		 */
200 		return (B_FALSE);
201 	}
202 
203 	grp->lg_nattached_ports++;
204 
205 	/*
206 	 * Update the group link state.
207 	 */
208 	if (grp->lg_link_state != LINK_STATE_UP) {
209 		grp->lg_link_state = LINK_STATE_UP;
210 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
211 		link_changed = B_TRUE;
212 	}
213 
214 	aggr_grp_multicst_port(port, B_TRUE);
215 
216 	/*
217 	 * Update port's state.
218 	 */
219 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
220 
221 	/*
222 	 * If LACP is OFF, the port can be used to send data as soon
223 	 * as its link is up and verified to be compatible with the
224 	 * aggregation.
225 	 *
226 	 * If LACP is active or passive, notify the LACP subsystem, which
227 	 * will enable sending on the port following the LACP protocol.
228 	 */
229 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
230 		aggr_send_port_enable(port);
231 	else
232 		aggr_lacp_port_attached(port);
233 
234 	return (link_changed);
235 }
236 
237 boolean_t
238 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
239 {
240 	boolean_t link_changed = B_FALSE;
241 
242 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
243 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
244 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
245 
246 	/* update state */
247 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
248 		return (B_FALSE);
249 	port->lp_state = AGGR_PORT_STATE_STANDBY;
250 
251 	aggr_grp_multicst_port(port, B_FALSE);
252 
253 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
254 		aggr_send_port_disable(port);
255 	else
256 		aggr_lacp_port_detached(port);
257 
258 	grp->lg_nattached_ports--;
259 	if (grp->lg_nattached_ports == 0) {
260 		/* the last attached MAC port of the group is being detached */
261 		grp->lg_ifspeed = 0;
262 		grp->lg_link_state = LINK_STATE_DOWN;
263 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
264 		link_changed = B_TRUE;
265 	}
266 
267 	return (link_changed);
268 }
269 
270 /*
271  * Update the MAC addresses of the constituent ports of the specified
272  * group. This function is invoked:
273  * - after creating a new aggregation group.
274  * - after adding new ports to an aggregation group.
275  * - after removing a port from a group when the MAC address of
276  *   that port was used for the MAC address of the group.
277  * - after the MAC address of a port changed when the MAC address
278  *   of that port was used for the MAC address of the group.
279  */
280 void
281 aggr_grp_update_ports_mac(aggr_grp_t *grp)
282 {
283 	aggr_port_t *cport;
284 
285 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
286 
287 	for (cport = grp->lg_ports; cport != NULL;
288 	    cport = cport->lp_next) {
289 		rw_enter(&cport->lp_lock, RW_WRITER);
290 		if (aggr_port_unicst(cport, grp->lg_addr) != 0)
291 			(void) aggr_grp_detach_port(grp, cport);
292 		rw_exit(&cport->lp_lock);
293 		if (grp->lg_closing)
294 			break;
295 	}
296 }
297 
298 /*
299  * Invoked when the MAC address of a port has changed. If the port's
300  * MAC address was used for the group MAC address, returns B_TRUE.
301  * In that case, it is the responsibility of the caller to
302  * invoke aggr_grp_update_ports_mac() after releasing the
303  * the port lock, and aggr_grp_notify() after releasing the
304  * group lock.
305  */
306 boolean_t
307 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port)
308 {
309 	boolean_t grp_addr_changed = B_FALSE;
310 
311 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
312 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
313 
314 	if (grp->lg_addr_fixed) {
315 		/*
316 		 * The group is using a fixed MAC address or an automatic
317 		 * MAC address has not been set.
318 		 */
319 		return (B_FALSE);
320 	}
321 
322 	if (grp->lg_mac_addr_port == port) {
323 		/*
324 		 * The MAC address of the port was assigned to the group
325 		 * MAC address. Update the group MAC address.
326 		 */
327 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
328 		grp_addr_changed = B_TRUE;
329 	} else {
330 		/*
331 		 * Update the actual port MAC address to the MAC address
332 		 * of the group.
333 		 */
334 		if (aggr_port_unicst(port, grp->lg_addr) != 0)
335 			(void) aggr_grp_detach_port(grp, port);
336 	}
337 
338 	return (grp_addr_changed);
339 }
340 
341 /*
342  * Add a port to a link aggregation group.
343  */
344 static int
345 aggr_grp_add_port(aggr_grp_t *grp, const char *name, uint_t portnum,
346     aggr_port_t **pp)
347 {
348 	aggr_port_t *port, **cport;
349 	int err;
350 
351 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
352 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
353 
354 	/* create new port */
355 	err = aggr_port_create(name, portnum, &port);
356 	if (err != 0)
357 		return (err);
358 
359 	rw_enter(&port->lp_lock, RW_WRITER);
360 
361 	/* add port to list of group constituent ports */
362 	cport = &grp->lg_ports;
363 	while (*cport != NULL)
364 		cport = &((*cport)->lp_next);
365 	*cport = port;
366 
367 	/*
368 	 * Back reference to the group it is member of. A port always
369 	 * holds a reference to its group to ensure that the back
370 	 * reference is always valid.
371 	 */
372 	port->lp_grp = grp;
373 	AGGR_GRP_REFHOLD(grp);
374 	grp->lg_nports++;
375 
376 	aggr_lacp_init_port(port);
377 
378 	rw_exit(&port->lp_lock);
379 
380 	if (pp != NULL)
381 		*pp = port;
382 
383 	return (0);
384 }
385 
386 /*
387  * Add one or more ports to an existing link aggregation group.
388  */
389 int
390 aggr_grp_add_ports(uint32_t key, uint_t nports, laioc_port_t *ports)
391 {
392 	int rc, i, nadded = 0;
393 	ghte_t hte;
394 	aggr_grp_t *grp = NULL;
395 	aggr_port_t *port;
396 
397 	/* get group corresponding to key */
398 	ght_lock(aggr_grp_hash, GHT_READ);
399 	if ((rc = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key),
400 	    &hte)) == ENOENT) {
401 		ght_unlock(aggr_grp_hash);
402 		return (rc);
403 	}
404 	ASSERT(rc == 0);
405 	grp = (aggr_grp_t *)GHT_VAL(hte);
406 	AGGR_GRP_REFHOLD(grp);
407 	ght_unlock(aggr_grp_hash);
408 
409 	AGGR_LACP_LOCK(grp);
410 	rw_enter(&grp->lg_lock, RW_WRITER);
411 
412 	/* add the specified ports to group */
413 	for (i = 0; i < nports; i++) {
414 		/* add port to group */
415 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_devname,
416 		    ports[i].lp_port, &port)) != 0)
417 			goto bail;
418 		ASSERT(port != NULL);
419 		nadded++;
420 
421 		/* check capabilities */
422 		if (!aggr_grp_capab_check(grp, port)) {
423 			rc = ENOTSUP;
424 			goto bail;
425 		}
426 
427 		/* start port if group has already been started */
428 		if (grp->lg_started) {
429 			rw_enter(&port->lp_lock, RW_WRITER);
430 			rc = aggr_port_start(port);
431 			if (rc != 0) {
432 				rw_exit(&port->lp_lock);
433 				goto bail;
434 			}
435 
436 			/* set port promiscuous mode */
437 			rc = aggr_port_promisc(port, grp->lg_promisc);
438 			if (rc != 0) {
439 				rw_exit(&port->lp_lock);
440 				goto bail;
441 			}
442 			rw_exit(&port->lp_lock);
443 		}
444 	}
445 
446 	/* update the MAC address of the constituent ports */
447 	aggr_grp_update_ports_mac(grp);
448 
449 bail:
450 	if (rc != 0) {
451 		/* stop and remove ports that have been added */
452 		for (i = 0; i < nadded && !grp->lg_closing; i++) {
453 			port = aggr_grp_port_lookup(grp, ports[i].lp_devname,
454 			    ports[i].lp_port);
455 			ASSERT(port != NULL);
456 			if (grp->lg_started) {
457 				rw_enter(&port->lp_lock, RW_WRITER);
458 				aggr_port_stop(port);
459 				rw_exit(&port->lp_lock);
460 			}
461 			(void) aggr_grp_rem_port(grp, port, NULL);
462 		}
463 	}
464 
465 	rw_exit(&grp->lg_lock);
466 	AGGR_LACP_UNLOCK(grp);
467 	if (rc == 0 && !grp->lg_closing)
468 		mac_resource_update(&grp->lg_mac);
469 	AGGR_GRP_REFRELE(grp);
470 	return (rc);
471 }
472 
473 /*
474  * Update properties of an existing link aggregation group.
475  */
476 int
477 aggr_grp_modify(uint32_t key, aggr_grp_t *grp_arg, uint8_t update_mask,
478     uint32_t policy, boolean_t mac_fixed, const uchar_t *mac_addr,
479     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
480 {
481 	int rc = 0;
482 	ghte_t hte;
483 	aggr_grp_t *grp = NULL;
484 	boolean_t mac_addr_changed = B_FALSE;
485 
486 	if (grp_arg == NULL) {
487 		/* get group corresponding to key */
488 		ght_lock(aggr_grp_hash, GHT_READ);
489 		if ((rc = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key),
490 		    &hte)) == ENOENT)
491 			goto bail;
492 		ASSERT(rc == 0);
493 		grp = (aggr_grp_t *)GHT_VAL(hte);
494 		AGGR_LACP_LOCK(grp);
495 		rw_enter(&grp->lg_lock, RW_WRITER);
496 	} else {
497 		grp = grp_arg;
498 		ASSERT(AGGR_LACP_LOCK_HELD(grp));
499 		ASSERT(RW_WRITE_HELD(&grp->lg_lock));
500 	}
501 
502 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
503 	AGGR_GRP_REFHOLD(grp);
504 
505 	/* validate fixed address if specified */
506 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
507 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
508 	    (mac_addr[0] & 0x01))) {
509 		rc = EINVAL;
510 		goto bail;
511 	}
512 
513 	/* update policy if requested */
514 	if (update_mask & AGGR_MODIFY_POLICY)
515 		aggr_send_update_policy(grp, policy);
516 
517 	/* update unicast MAC address if requested */
518 	if (update_mask & AGGR_MODIFY_MAC) {
519 		if (mac_fixed) {
520 			/* user-supplied MAC address */
521 			grp->lg_mac_addr_port = NULL;
522 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
523 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
524 				mac_addr_changed = B_TRUE;
525 			}
526 		} else if (grp->lg_addr_fixed) {
527 			/* switch from user-supplied to automatic */
528 			aggr_port_t *port = grp->lg_ports;
529 
530 			rw_enter(&port->lp_lock, RW_WRITER);
531 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
532 			grp->lg_mac_addr_port = port;
533 			mac_addr_changed = B_TRUE;
534 			rw_exit(&port->lp_lock);
535 		}
536 		grp->lg_addr_fixed = mac_fixed;
537 	}
538 
539 	if (mac_addr_changed)
540 		aggr_grp_update_ports_mac(grp);
541 
542 	if (update_mask & AGGR_MODIFY_LACP_MODE)
543 		aggr_lacp_update_mode(grp, lacp_mode);
544 
545 	if ((update_mask & AGGR_MODIFY_LACP_TIMER) && !grp->lg_closing)
546 		aggr_lacp_update_timer(grp, lacp_timer);
547 
548 bail:
549 	if (grp_arg == NULL) {
550 		if (grp != NULL) {
551 			rw_exit(&grp->lg_lock);
552 			AGGR_LACP_UNLOCK(grp);
553 		}
554 		ght_unlock(aggr_grp_hash);
555 		/* pass new unicast address up to MAC layer */
556 		if (grp != NULL && mac_addr_changed && !grp->lg_closing)
557 			mac_unicst_update(&grp->lg_mac, grp->lg_addr);
558 	}
559 
560 	if (grp != NULL)
561 		AGGR_GRP_REFRELE(grp);
562 
563 	return (rc);
564 }
565 
566 /*
567  * Create a new link aggregation group upon request from administrator.
568  * Returns 0 on success, an errno on failure.
569  */
570 int
571 aggr_grp_create(uint32_t key, uint_t nports, laioc_port_t *ports,
572     uint32_t policy, boolean_t mac_fixed, uchar_t *mac_addr,
573     aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer)
574 {
575 	aggr_grp_t *grp = NULL;
576 	aggr_port_t *port;
577 	ghte_t hte;
578 	mac_t *mac;
579 	mac_info_t *mip;
580 	int err;
581 	int i;
582 
583 	/* need at least one port */
584 	if (nports == 0)
585 		return (EINVAL);
586 
587 	ght_lock(aggr_grp_hash, GHT_WRITE);
588 
589 	/* does a group with the same key already exist? */
590 	err = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key), &hte);
591 	if (err != ENOENT) {
592 		ght_unlock(aggr_grp_hash);
593 		return (EEXIST);
594 	}
595 
596 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
597 
598 	AGGR_LACP_LOCK(grp);
599 	rw_enter(&grp->lg_lock, RW_WRITER);
600 
601 	grp->lg_refs = 1;
602 	grp->lg_closing = B_FALSE;
603 	grp->lg_key = key;
604 
605 	grp->lg_ifspeed = 0;
606 	grp->lg_link_state = LINK_STATE_UNKNOWN;
607 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
608 	grp->lg_started = B_FALSE;
609 	grp->lg_promisc = B_FALSE;
610 	aggr_lacp_init_grp(grp);
611 
612 	/* add MAC ports to group */
613 	grp->lg_ports = NULL;
614 	grp->lg_nports = 0;
615 	grp->lg_nattached_ports = 0;
616 	grp->lg_ntx_ports = 0;
617 
618 	for (i = 0; i < nports; i++) {
619 		err = aggr_grp_add_port(grp, ports[i].lp_devname,
620 		    ports[i].lp_port, NULL);
621 		if (err != 0)
622 			goto bail;
623 	}
624 
625 	/*
626 	 * If no explicit MAC address was specified by the administrator,
627 	 * set it to the MAC address of the first port.
628 	 */
629 	grp->lg_addr_fixed = mac_fixed;
630 	if (grp->lg_addr_fixed) {
631 		/* validate specified address */
632 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
633 			err = EINVAL;
634 			goto bail;
635 		}
636 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
637 	} else {
638 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
639 		grp->lg_mac_addr_port = grp->lg_ports;
640 	}
641 
642 	/* update the MAC address of the constituent ports */
643 	aggr_grp_update_ports_mac(grp);
644 
645 	/* update outbound load balancing policy */
646 	aggr_send_update_policy(grp, policy);
647 
648 	/* register with the MAC module */
649 	mac = &grp->lg_mac;
650 	bzero(mac, sizeof (*mac));
651 
652 	mac->m_ident = MAC_IDENT;
653 
654 	mac->m_driver = grp;
655 	mac->m_dip = aggr_dip;
656 	mac->m_port = key;
657 
658 	mip = &(mac->m_info);
659 	mip->mi_media = DL_ETHER;
660 	mip->mi_sdu_min = 0;
661 	mip->mi_sdu_max = ETHERMTU;
662 
663 	MAC_STAT_MIB(mip->mi_stat);
664 	MAC_STAT_ETHER(mip->mi_stat);
665 	mip->mi_stat[MAC_STAT_LINK_DUPLEX] = B_TRUE;
666 
667 	mip->mi_addr_length = ETHERADDRL;
668 	bcopy(aggr_brdcst_mac, mip->mi_brdcst_addr, ETHERADDRL);
669 	bcopy(grp->lg_addr, mip->mi_unicst_addr, ETHERADDRL);
670 
671 	mac->m_stat = aggr_m_stat;
672 	mac->m_start = aggr_m_start;
673 	mac->m_stop = aggr_m_stop;
674 	mac->m_promisc = aggr_m_promisc;
675 	mac->m_multicst = aggr_m_multicst;
676 	mac->m_unicst = aggr_m_unicst;
677 	mac->m_tx = aggr_m_tx;
678 	mac->m_resources = aggr_m_resources;
679 	mac->m_ioctl = aggr_m_ioctl;
680 
681 	/* set the initial group capabilities */
682 	aggr_grp_capab_set(grp);
683 
684 	if ((err = mac_register(mac)) != 0)
685 		goto bail;
686 
687 	/* set LACP mode */
688 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
689 
690 	/* add new group to hash table */
691 	hte = ght_alloc(aggr_grp_hash, KM_SLEEP);
692 	GHT_KEY(hte) = GHT_SCALAR_TO_KEY(key);
693 	GHT_VAL(hte) = GHT_PTR_TO_VAL(grp);
694 	grp->lg_hte = hte;
695 
696 	err = ght_insert(hte);
697 	ASSERT(err == 0);
698 
699 	rw_exit(&grp->lg_lock);
700 	AGGR_LACP_UNLOCK(grp);
701 	ght_unlock(aggr_grp_hash);
702 
703 	return (0);
704 
705 bail:
706 	if (grp != NULL) {
707 		aggr_port_t *cport;
708 
709 		port = grp->lg_ports;
710 		while (port != NULL) {
711 			cport = port->lp_next;
712 			aggr_port_delete(port);
713 			port = cport;
714 		}
715 
716 		rw_exit(&grp->lg_lock);
717 		AGGR_LACP_UNLOCK(grp);
718 
719 		kmem_cache_free(aggr_grp_cache, grp);
720 	}
721 
722 	ght_unlock(aggr_grp_hash);
723 	return (err);
724 }
725 
726 /*
727  * Return a pointer to the member of a group with specified device name
728  * and port number.
729  */
730 static aggr_port_t *
731 aggr_grp_port_lookup(aggr_grp_t *grp, const char *devname, uint32_t portnum)
732 {
733 	aggr_port_t *port;
734 
735 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
736 
737 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
738 		if ((strcmp(port->lp_devname, devname) == 0) &&
739 		    (port->lp_port == portnum))
740 			break;
741 	}
742 
743 	return (port);
744 }
745 
746 /*
747  * Stop, detach and remove a port from a link aggregation group.
748  */
749 static int
750 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, boolean_t *do_notify)
751 {
752 	aggr_port_t **pport;
753 	boolean_t grp_mac_addr_changed = B_FALSE;
754 	uint64_t val;
755 	uint_t i;
756 
757 	ASSERT(AGGR_LACP_LOCK_HELD(grp));
758 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
759 	ASSERT(grp->lg_nports > 1);
760 
761 	if (do_notify != NULL)
762 		*do_notify = B_FALSE;
763 
764 	/* unlink port */
765 	for (pport = &grp->lg_ports; *pport != port;
766 	    pport = &(*pport)->lp_next) {
767 		if (*pport == NULL)
768 			return (ENOENT);
769 	}
770 	*pport = port->lp_next;
771 
772 	rw_enter(&port->lp_lock, RW_WRITER);
773 	port->lp_closing = B_TRUE;
774 
775 	/*
776 	 * If the MAC address of the port being removed was assigned
777 	 * to the group, update the group MAC address
778 	 * using the MAC address of a different port.
779 	 */
780 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
781 		/*
782 		 * Set the MAC address of the group to the
783 		 * MAC address of its first port.
784 		 */
785 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
786 		grp->lg_mac_addr_port = grp->lg_ports;
787 		grp_mac_addr_changed = B_TRUE;
788 	}
789 
790 	(void) aggr_grp_detach_port(grp, port);
791 
792 	/*
793 	 * Add the statistics of the ports while it was aggregated
794 	 * to the group's residual statistics.
795 	 */
796 	for (i = 0; i < MAC_NSTAT && !grp->lg_closing; i++) {
797 		/* avoid stats that are not counters */
798 		if (i == MAC_STAT_IFSPEED || i == MAC_STAT_LINK_DUPLEX)
799 			continue;
800 
801 		/* get current value */
802 		val = aggr_port_stat(port, i);
803 		/* subtract value at the point of aggregation */
804 		val -= port->lp_stat[i];
805 		/* add to the residual stat */
806 		grp->lg_stat[i] += val;
807 	}
808 
809 	grp->lg_nports--;
810 
811 	rw_exit(&port->lp_lock);
812 
813 	aggr_port_delete(port);
814 
815 	/*
816 	 * If the group MAC address has changed, update the MAC address of
817 	 * the remaining consistuent ports according to the new MAC
818 	 * address of the group.
819 	 */
820 	if (grp->lg_closing) {
821 		*do_notify = B_FALSE;
822 	} else {
823 		if (grp_mac_addr_changed)
824 			aggr_grp_update_ports_mac(grp);
825 
826 		if (do_notify != NULL)
827 			*do_notify = grp_mac_addr_changed;
828 	}
829 
830 	return (0);
831 }
832 
833 /*
834  * Remove one or more ports from an existing link aggregation group.
835  */
836 int
837 aggr_grp_rem_ports(uint32_t key, uint_t nports, laioc_port_t *ports)
838 {
839 	int rc = 0, i;
840 	ghte_t hte;
841 	aggr_grp_t *grp = NULL;
842 	aggr_port_t *port;
843 	boolean_t notify = B_FALSE, grp_mac_addr_changed;
844 
845 	/* get group corresponding to key */
846 	ght_lock(aggr_grp_hash, GHT_READ);
847 	if ((rc = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key),
848 	    &hte)) == ENOENT) {
849 		ght_unlock(aggr_grp_hash);
850 		return (rc);
851 	}
852 	ASSERT(rc == 0);
853 	grp = (aggr_grp_t *)GHT_VAL(hte);
854 	AGGR_GRP_REFHOLD(grp);
855 	ght_unlock(aggr_grp_hash);
856 	AGGR_LACP_LOCK(grp);
857 	rw_enter(&grp->lg_lock, RW_WRITER);
858 
859 	/* we need to keep at least one port per group */
860 	if (nports >= grp->lg_nports) {
861 		rc = EINVAL;
862 		goto bail;
863 	}
864 
865 	/* first verify that all the groups are valid */
866 	for (i = 0; i < nports; i++) {
867 		if (aggr_grp_port_lookup(grp, ports[i].lp_devname,
868 		    ports[i].lp_port) == NULL) {
869 			/* port not found */
870 			rc = ENOENT;
871 			goto bail;
872 		}
873 	}
874 
875 	/* remove the specified ports from group */
876 	for (i = 0; i < nports && !grp->lg_closing; i++) {
877 		/* lookup port */
878 		port = aggr_grp_port_lookup(grp, ports[i].lp_devname,
879 		    ports[i].lp_port);
880 		ASSERT(port != NULL);
881 
882 		/* stop port if group has already been started */
883 		if (grp->lg_started) {
884 			rw_enter(&port->lp_lock, RW_WRITER);
885 			aggr_port_stop(port);
886 			rw_exit(&port->lp_lock);
887 		}
888 
889 		/* remove port from group */
890 		rc = aggr_grp_rem_port(grp, port, &grp_mac_addr_changed);
891 		ASSERT(rc == 0);
892 		notify = notify || grp_mac_addr_changed;
893 	}
894 
895 bail:
896 	rw_exit(&grp->lg_lock);
897 	AGGR_LACP_UNLOCK(grp);
898 	if (notify && !grp->lg_closing)
899 		mac_unicst_update(&grp->lg_mac, grp->lg_addr);
900 	if (rc == 0 && !grp->lg_closing)
901 		mac_resource_update(&grp->lg_mac);
902 	AGGR_GRP_REFRELE(grp);
903 
904 	return (rc);
905 }
906 
907 int
908 aggr_grp_delete(uint32_t key)
909 {
910 	int err;
911 	ghte_t hte;
912 	aggr_grp_t *grp;
913 	aggr_port_t *port, *cport;
914 
915 	ght_lock(aggr_grp_hash, GHT_WRITE);
916 
917 	err = ght_find(aggr_grp_hash, GHT_SCALAR_TO_KEY(key), &hte);
918 	if (err == ENOENT) {
919 		ght_unlock(aggr_grp_hash);
920 		return (err);
921 	}
922 	ASSERT(err == 0);
923 
924 	grp = (aggr_grp_t *)GHT_VAL(hte);
925 
926 	AGGR_LACP_LOCK(grp);
927 	rw_enter(&grp->lg_lock, RW_WRITER);
928 	grp->lg_closing = B_TRUE;
929 
930 	/*
931 	 * Unregister from the MAC service module. Since this can
932 	 * fail if a client hasn't closed the MAC port, we gracefully
933 	 * fail the operation.
934 	 */
935 	if (mac_unregister(&grp->lg_mac)) {
936 		rw_exit(&grp->lg_lock);
937 		AGGR_LACP_UNLOCK(grp);
938 		ght_unlock(aggr_grp_hash);
939 		return (EBUSY);
940 	}
941 
942 	/* detach and free MAC ports associated with group */
943 	port = grp->lg_ports;
944 	while (port != NULL) {
945 		cport = port->lp_next;
946 		rw_enter(&port->lp_lock, RW_WRITER);
947 		if (grp->lg_started)
948 			aggr_port_stop(port);
949 		(void) aggr_grp_detach_port(grp, port);
950 		rw_exit(&port->lp_lock);
951 		aggr_port_delete(port);
952 		port = cport;
953 	}
954 
955 	rw_exit(&grp->lg_lock);
956 	AGGR_LACP_UNLOCK(grp);
957 
958 	ght_remove(hte);
959 	ght_free(hte);
960 
961 	ght_unlock(aggr_grp_hash);
962 	AGGR_GRP_REFRELE(grp);
963 
964 	return (0);
965 }
966 
967 void
968 aggr_grp_free(aggr_grp_t *grp)
969 {
970 	ASSERT(grp->lg_refs == 0);
971 	kmem_cache_free(aggr_grp_cache, grp);
972 }
973 
974 /*
975  * Walker invoked when building the list of configured groups and
976  * their ports that must be passed up to user-space.
977  */
978 
979 static boolean_t
980 aggr_grp_info_walker(void *arg, ghte_t hte)
981 {
982 	aggr_grp_t *grp;
983 	aggr_port_t *port;
984 	aggr_grp_info_state_t *state = arg;
985 
986 	if (state->ls_rc != 0)
987 		return (B_FALSE);	/* terminate walk */
988 
989 	grp = (aggr_grp_t *)GHT_VAL(hte);
990 
991 	rw_enter(&grp->lg_lock, RW_READER);
992 
993 	if (state->ls_group_key != 0 && grp->lg_key != state->ls_group_key)
994 		goto bail;
995 
996 	state->ls_group_found = B_TRUE;
997 
998 	state->ls_rc = state->ls_new_grp_fn(state->ls_fn_arg, grp->lg_key,
999 	    grp->lg_addr, grp->lg_addr_fixed, grp->lg_tx_policy,
1000 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1001 
1002 	if (state->ls_rc != 0)
1003 		goto bail;
1004 
1005 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1006 
1007 		rw_enter(&port->lp_lock, RW_READER);
1008 
1009 		state->ls_rc = state->ls_new_port_fn(state->ls_fn_arg,
1010 		    port->lp_devname, port->lp_port, port->lp_addr,
1011 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1012 
1013 		rw_exit(&port->lp_lock);
1014 
1015 		if (state->ls_rc != 0)
1016 			goto bail;
1017 	}
1018 
1019 bail:
1020 	rw_exit(&grp->lg_lock);
1021 	return (state->ls_rc == 0);
1022 }
1023 
1024 int
1025 aggr_grp_info(uint_t *ngroups, uint32_t group_key, void *fn_arg,
1026     aggr_grp_info_new_grp_fn_t new_grp_fn,
1027     aggr_grp_info_new_port_fn_t new_port_fn)
1028 {
1029 	aggr_grp_info_state_t state;
1030 	int rc = 0;
1031 
1032 	ght_lock(aggr_grp_hash, GHT_READ);
1033 
1034 	*ngroups = ght_count(aggr_grp_hash);
1035 
1036 	bzero(&state, sizeof (state));
1037 	state.ls_group_key = group_key;
1038 	state.ls_new_grp_fn = new_grp_fn;
1039 	state.ls_new_port_fn = new_port_fn;
1040 	state.ls_fn_arg = fn_arg;
1041 
1042 	ght_walk(aggr_grp_hash, aggr_grp_info_walker, &state);
1043 
1044 	if ((rc = state.ls_rc) == 0 && group_key != 0 &&
1045 	    !state.ls_group_found)
1046 		rc = ENOENT;
1047 
1048 	ght_unlock(aggr_grp_hash);
1049 	return (rc);
1050 }
1051 
1052 /*
1053  * Aggregation group walker.
1054  */
1055 
1056 typedef struct aggr_grp_walker_state_s {
1057 	aggr_grp_walker_fn_t ws_walker_fn;
1058 	void		*ws_arg;
1059 } aggr_grp_walker_state_t;
1060 
1061 void
1062 aggr_grp_walker(void *arg, ghte_t hte)
1063 {
1064 	aggr_grp_walker_state_t *state = arg;
1065 	aggr_grp_t *grp;
1066 
1067 	grp = (aggr_grp_t *)GHT_VAL(hte);
1068 	state->ws_walker_fn(grp, state->ws_arg);
1069 }
1070 
1071 void
1072 aggr_grp_walk(aggr_grp_walker_fn_t walker, void *arg)
1073 {
1074 	aggr_grp_walker_state_t state;
1075 
1076 	state.ws_walker_fn = walker;
1077 	state.ws_arg = arg;
1078 
1079 	ght_lock(aggr_grp_hash, GHT_READ);
1080 	ght_walk(aggr_grp_hash, aggr_grp_info_walker, &state);
1081 	ght_unlock(aggr_grp_hash);
1082 }
1083 
1084 static void
1085 aggr_m_resources(void *arg)
1086 {
1087 	aggr_grp_t *grp = arg;
1088 	aggr_port_t *port;
1089 
1090 	/* Call each port's m_resources function */
1091 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1092 		mac_resources(port->lp_mh);
1093 }
1094 
1095 /*ARGSUSED*/
1096 static void
1097 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1098 {
1099 	miocnak(q, mp, 0, ENOTSUP);
1100 }
1101 
1102 static uint64_t
1103 aggr_m_stat(void *arg, enum mac_stat stat)
1104 {
1105 	aggr_grp_t *grp = arg;
1106 	aggr_port_t *port;
1107 	uint64_t val;
1108 
1109 	rw_enter(&grp->lg_lock, RW_READER);
1110 
1111 	switch (stat) {
1112 	case MAC_STAT_IFSPEED:
1113 		val = grp->lg_ifspeed;
1114 		break;
1115 	case MAC_STAT_LINK_DUPLEX:
1116 		val = grp->lg_link_duplex;
1117 		break;
1118 	default:
1119 		/*
1120 		 * The remaining statistics are counters. They are computed
1121 		 * by aggregating the counters of the members MACs while they
1122 		 * were aggregated, plus the residual counter of the group
1123 		 * itself, which is updated each time a MAC is removed from
1124 		 * the group.
1125 		 */
1126 		val = 0;
1127 		for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1128 			/* actual port statistic */
1129 			val += aggr_port_stat(port, stat);
1130 			/* minus the port stat when it was added */
1131 			val -= port->lp_stat[stat];
1132 			/* plus any residual amount for the group */
1133 			val += grp->lg_stat[stat];
1134 		}
1135 	}
1136 
1137 	rw_exit(&grp->lg_lock);
1138 	return (val);
1139 }
1140 
1141 static int
1142 aggr_m_start(void *arg)
1143 {
1144 	aggr_grp_t *grp = arg;
1145 	aggr_port_t *port;
1146 
1147 	AGGR_LACP_LOCK(grp);
1148 	rw_enter(&grp->lg_lock, RW_WRITER);
1149 
1150 	/*
1151 	 * Attempts to start all configured members of the group.
1152 	 * Group members will be attached when their link-up notification
1153 	 * is received.
1154 	 */
1155 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1156 		rw_enter(&port->lp_lock, RW_WRITER);
1157 		if (aggr_port_start(port) != 0) {
1158 			rw_exit(&port->lp_lock);
1159 			continue;
1160 		}
1161 
1162 		/* set port promiscuous mode */
1163 		if (aggr_port_promisc(port, grp->lg_promisc) != 0)
1164 			aggr_port_stop(port);
1165 		rw_exit(&port->lp_lock);
1166 	}
1167 
1168 	grp->lg_started = B_TRUE;
1169 
1170 	rw_exit(&grp->lg_lock);
1171 	AGGR_LACP_UNLOCK(grp);
1172 
1173 	return (0);
1174 }
1175 
1176 static void
1177 aggr_m_stop(void *arg)
1178 {
1179 	aggr_grp_t *grp = arg;
1180 	aggr_port_t *port;
1181 
1182 	rw_enter(&grp->lg_lock, RW_WRITER);
1183 
1184 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1185 		rw_enter(&port->lp_lock, RW_WRITER);
1186 		aggr_port_stop(port);
1187 		rw_exit(&port->lp_lock);
1188 	}
1189 
1190 	grp->lg_started = B_FALSE;
1191 
1192 	rw_exit(&grp->lg_lock);
1193 }
1194 
1195 static int
1196 aggr_m_promisc(void *arg, boolean_t on)
1197 {
1198 	aggr_grp_t *grp = arg;
1199 	aggr_port_t *port;
1200 
1201 	rw_enter(&grp->lg_lock, RW_WRITER);
1202 	AGGR_GRP_REFHOLD(grp);
1203 
1204 	if (on == grp->lg_promisc)
1205 		goto bail;
1206 
1207 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1208 		rw_enter(&port->lp_lock, RW_WRITER);
1209 		AGGR_PORT_REFHOLD(port);
1210 		if (port->lp_started) {
1211 			if (aggr_port_promisc(port, on) != 0)
1212 				(void) aggr_grp_detach_port(grp, port);
1213 		}
1214 		rw_exit(&port->lp_lock);
1215 		AGGR_PORT_REFRELE(port);
1216 		if (grp->lg_closing)
1217 			break;
1218 	}
1219 
1220 	grp->lg_promisc = on;
1221 
1222 bail:
1223 	rw_exit(&grp->lg_lock);
1224 	AGGR_GRP_REFRELE(grp);
1225 
1226 	return (0);
1227 }
1228 
1229 /*
1230  * Add or remove the multicast addresses that are defined for the group
1231  * to or from the specified port.
1232  * This function is called before stopping a port, before a port
1233  * is detached from a group, and when attaching a port to a group.
1234  */
1235 void
1236 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
1237 {
1238 	aggr_grp_t *grp = port->lp_grp;
1239 
1240 	ASSERT(RW_WRITE_HELD(&port->lp_lock));
1241 	ASSERT(RW_WRITE_HELD(&grp->lg_lock) || RW_READ_HELD(&grp->lg_lock));
1242 
1243 	if (!port->lp_started)
1244 		return;
1245 
1246 	mac_multicst_refresh(&grp->lg_mac, aggr_port_multicst, port,
1247 	    add);
1248 }
1249 
1250 static int
1251 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
1252 {
1253 	aggr_grp_t *grp = arg;
1254 	aggr_port_t *port = NULL;
1255 	int err = 0, cerr;
1256 
1257 	rw_enter(&grp->lg_lock, RW_WRITER);
1258 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1259 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
1260 			continue;
1261 		cerr = aggr_port_multicst(port, add, addrp);
1262 		if (cerr != 0 && err == 0)
1263 			err = cerr;
1264 	}
1265 	rw_exit(&grp->lg_lock);
1266 	return (err);
1267 }
1268 
1269 static int
1270 aggr_m_unicst(void *arg, const uint8_t *macaddr)
1271 {
1272 	aggr_grp_t *grp = arg;
1273 	int rc;
1274 
1275 	AGGR_LACP_LOCK(grp);
1276 	rw_enter(&grp->lg_lock, RW_WRITER);
1277 	rc = aggr_grp_modify(0, grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
1278 	    0, 0);
1279 	rw_exit(&grp->lg_lock);
1280 	AGGR_LACP_UNLOCK(grp);
1281 
1282 	return (rc);
1283 }
1284 
1285 /*
1286  * Initialize the capabilities that are advertised for the group
1287  * according to the capabilities of the constituent ports.
1288  */
1289 static void
1290 aggr_grp_capab_set(aggr_grp_t *grp)
1291 {
1292 	uint32_t cksum = (uint32_t)-1;
1293 	uint32_t poll = DL_CAPAB_POLL;
1294 	aggr_port_t *port;
1295 	const mac_info_t *port_mi;
1296 
1297 	ASSERT(RW_WRITE_HELD(&grp->lg_lock));
1298 
1299 	ASSERT(grp->lg_ports != NULL);
1300 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1301 		port_mi = mac_info(port->lp_mh);
1302 		cksum &= port_mi->mi_cksum;
1303 		poll &= port_mi->mi_poll;
1304 	}
1305 
1306 	grp->lg_mac.m_info.mi_cksum = cksum;
1307 	grp->lg_mac.m_info.mi_poll = poll;
1308 }
1309 
1310 /*
1311  * Checks whether the capabilities of the ports being added are compatible
1312  * with the current capabilities of the aggregation.
1313  */
1314 static boolean_t
1315 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
1316 {
1317 	const mac_info_t *port_mi = mac_info(port->lp_mh);
1318 	uint32_t grp_cksum = grp->lg_mac.m_info.mi_cksum;
1319 
1320 	ASSERT(grp->lg_ports != NULL);
1321 
1322 	return (((grp_cksum & port_mi->mi_cksum) == grp_cksum) &&
1323 	    (grp->lg_mac.m_info.mi_poll == port_mi->mi_poll));
1324 }
1325