xref: /titanic_41/usr/src/uts/common/io/aggr/aggr_grp.c (revision e9af4bc0b1cc30cea75d6ad4aa2fde97d985e9be)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups.
28  *
29  * An instance of the structure aggr_grp_t is allocated for each
30  * link aggregation group. When created, aggr_grp_t objects are
31  * entered into the aggr_grp_hash hash table maintained by the modhash
32  * module. The hash key is the linkid associated with the link
33  * aggregation group.
34  *
35  * A set of MAC ports are associated with each association group.
36  */
37 
38 #include <sys/types.h>
39 #include <sys/sysmacros.h>
40 #include <sys/conf.h>
41 #include <sys/cmn_err.h>
42 #include <sys/disp.h>
43 #include <sys/list.h>
44 #include <sys/ksynch.h>
45 #include <sys/kmem.h>
46 #include <sys/stream.h>
47 #include <sys/modctl.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/atomic.h>
51 #include <sys/stat.h>
52 #include <sys/modhash.h>
53 #include <sys/id_space.h>
54 #include <sys/strsun.h>
55 #include <sys/cred.h>
56 #include <sys/dlpi.h>
57 #include <sys/zone.h>
58 #include <sys/mac_provider.h>
59 #include <sys/dls.h>
60 #include <sys/vlan.h>
61 #include <sys/aggr.h>
62 #include <sys/aggr_impl.h>
63 
64 static int aggr_m_start(void *);
65 static void aggr_m_stop(void *);
66 static int aggr_m_promisc(void *, boolean_t);
67 static int aggr_m_multicst(void *, boolean_t, const uint8_t *);
68 static int aggr_m_unicst(void *, const uint8_t *);
69 static int aggr_m_stat(void *, uint_t, uint64_t *);
70 static void aggr_m_ioctl(void *, queue_t *, mblk_t *);
71 static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *);
72 static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
73     const void *);
74 static int aggr_m_getprop(void *, const char *, mac_prop_id_t, uint_t,
75     uint_t, void *, uint_t *);
76 
77 
78 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t);
79 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *,
80     boolean_t *);
81 
82 static void aggr_grp_capab_set(aggr_grp_t *);
83 static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *);
84 static uint_t aggr_grp_max_sdu(aggr_grp_t *);
85 static uint32_t aggr_grp_max_margin(aggr_grp_t *);
86 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *);
87 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *);
88 
89 static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
90 static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *);
91 static int aggr_pseudo_disable_intr(mac_intr_handle_t);
92 static int aggr_pseudo_enable_intr(mac_intr_handle_t);
93 static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t);
94 static void aggr_pseudo_stop_ring(mac_ring_driver_t);
95 static int aggr_addmac(void *, const uint8_t *);
96 static int aggr_remmac(void *, const uint8_t *);
97 static mblk_t *aggr_rx_poll(void *, int);
98 static void aggr_fill_ring(void *, mac_ring_type_t, const int,
99     const int, mac_ring_info_t *, mac_ring_handle_t);
100 static void aggr_fill_group(void *, mac_ring_type_t, const int,
101     mac_group_info_t *, mac_group_handle_t);
102 
103 static kmem_cache_t	*aggr_grp_cache;
104 static mod_hash_t	*aggr_grp_hash;
105 static krwlock_t	aggr_grp_lock;
106 static uint_t		aggr_grp_cnt;
107 static id_space_t	*key_ids;
108 
109 #define	GRP_HASHSZ		64
110 #define	GRP_HASH_KEY(linkid)	((mod_hash_key_t)(uintptr_t)linkid)
111 #define	AGGR_PORT_NAME_DELIMIT '-'
112 
113 static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0};
114 
115 #define	AGGR_M_CALLBACK_FLAGS	\
116 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP)
117 
118 static mac_callbacks_t aggr_m_callbacks = {
119 	AGGR_M_CALLBACK_FLAGS,
120 	aggr_m_stat,
121 	aggr_m_start,
122 	aggr_m_stop,
123 	aggr_m_promisc,
124 	aggr_m_multicst,
125 	NULL,
126 	aggr_m_tx,
127 	aggr_m_ioctl,
128 	aggr_m_capab_get,
129 	NULL,
130 	NULL,
131 	aggr_m_setprop,
132 	aggr_m_getprop
133 };
134 
135 /*ARGSUSED*/
136 static int
137 aggr_grp_constructor(void *buf, void *arg, int kmflag)
138 {
139 	aggr_grp_t *grp = buf;
140 
141 	bzero(grp, sizeof (*grp));
142 	mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL);
143 	cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL);
144 	rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL);
145 	mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL);
146 	cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL);
147 	grp->lg_link_state = LINK_STATE_UNKNOWN;
148 	return (0);
149 }
150 
151 /*ARGSUSED*/
152 static void
153 aggr_grp_destructor(void *buf, void *arg)
154 {
155 	aggr_grp_t *grp = buf;
156 
157 	if (grp->lg_tx_ports != NULL) {
158 		kmem_free(grp->lg_tx_ports,
159 		    grp->lg_tx_ports_size * sizeof (aggr_port_t *));
160 	}
161 
162 	mutex_destroy(&grp->lg_lacp_lock);
163 	cv_destroy(&grp->lg_lacp_cv);
164 	mutex_destroy(&grp->lg_port_lock);
165 	cv_destroy(&grp->lg_port_cv);
166 	rw_destroy(&grp->lg_tx_lock);
167 }
168 
169 void
170 aggr_grp_init(void)
171 {
172 	aggr_grp_cache = kmem_cache_create("aggr_grp_cache",
173 	    sizeof (aggr_grp_t), 0, aggr_grp_constructor,
174 	    aggr_grp_destructor, NULL, NULL, NULL, 0);
175 
176 	aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash",
177 	    GRP_HASHSZ, mod_hash_null_valdtor);
178 	rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL);
179 	aggr_grp_cnt = 0;
180 
181 	/*
182 	 * Allocate an id space to manage key values (when key is not
183 	 * specified). The range of the id space will be from
184 	 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol
185 	 * uses a 16-bit key.
186 	 */
187 	key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX);
188 	ASSERT(key_ids != NULL);
189 }
190 
191 void
192 aggr_grp_fini(void)
193 {
194 	id_space_destroy(key_ids);
195 	rw_destroy(&aggr_grp_lock);
196 	mod_hash_destroy_idhash(aggr_grp_hash);
197 	kmem_cache_destroy(aggr_grp_cache);
198 }
199 
200 uint_t
201 aggr_grp_count(void)
202 {
203 	uint_t	count;
204 
205 	rw_enter(&aggr_grp_lock, RW_READER);
206 	count = aggr_grp_cnt;
207 	rw_exit(&aggr_grp_lock);
208 	return (count);
209 }
210 
211 /*
212  * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions
213  * requires the mac perimeter, this function holds a reference of the aggr
214  * and aggr won't call mac_unregister() until this reference drops to 0.
215  */
216 void
217 aggr_grp_port_hold(aggr_port_t *port)
218 {
219 	aggr_grp_t	*grp = port->lp_grp;
220 
221 	AGGR_PORT_REFHOLD(port);
222 	mutex_enter(&grp->lg_port_lock);
223 	grp->lg_port_ref++;
224 	mutex_exit(&grp->lg_port_lock);
225 }
226 
227 /*
228  * Release the reference of the grp and inform aggr_grp_delete() calling
229  * mac_unregister() is now safe.
230  */
231 void
232 aggr_grp_port_rele(aggr_port_t *port)
233 {
234 	aggr_grp_t	*grp = port->lp_grp;
235 
236 	mutex_enter(&grp->lg_port_lock);
237 	if (--grp->lg_port_ref == 0)
238 		cv_signal(&grp->lg_port_cv);
239 	mutex_exit(&grp->lg_port_lock);
240 	AGGR_PORT_REFRELE(port);
241 }
242 
243 /*
244  * Wait for the port's lacp timer thread and the port's notification callback
245  * to exit.
246  */
247 void
248 aggr_grp_port_wait(aggr_grp_t *grp)
249 {
250 	mutex_enter(&grp->lg_port_lock);
251 	if (grp->lg_port_ref != 0)
252 		cv_wait(&grp->lg_port_cv, &grp->lg_port_lock);
253 	mutex_exit(&grp->lg_port_lock);
254 }
255 
256 /*
257  * Attach a port to a link aggregation group.
258  *
259  * A port is attached to a link aggregation group once its speed
260  * and link state have been verified.
261  *
262  * Returns B_TRUE if the group link state or speed has changed. If
263  * it's the case, the caller must notify the MAC layer via a call
264  * to mac_link().
265  */
266 boolean_t
267 aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port)
268 {
269 	boolean_t link_state_changed = B_FALSE;
270 
271 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
272 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
273 
274 	if (port->lp_state == AGGR_PORT_STATE_ATTACHED)
275 		return (B_FALSE);
276 
277 	/*
278 	 * Validate the MAC port link speed and update the group
279 	 * link speed if needed.
280 	 */
281 	if (port->lp_ifspeed == 0 ||
282 	    port->lp_link_state != LINK_STATE_UP ||
283 	    port->lp_link_duplex != LINK_DUPLEX_FULL) {
284 		/*
285 		 * Can't attach a MAC port with unknown link speed,
286 		 * down link, or not in full duplex mode.
287 		 */
288 		return (B_FALSE);
289 	}
290 
291 	if (grp->lg_ifspeed == 0) {
292 		/*
293 		 * The group inherits the speed of the first link being
294 		 * attached.
295 		 */
296 		grp->lg_ifspeed = port->lp_ifspeed;
297 		link_state_changed = B_TRUE;
298 	} else if (grp->lg_ifspeed != port->lp_ifspeed) {
299 		/*
300 		 * The link speed of the MAC port must be the same as
301 		 * the group link speed, as per 802.3ad. Since it is
302 		 * not, the attach is cancelled.
303 		 */
304 		return (B_FALSE);
305 	}
306 
307 	grp->lg_nattached_ports++;
308 
309 	/*
310 	 * Update the group link state.
311 	 */
312 	if (grp->lg_link_state != LINK_STATE_UP) {
313 		grp->lg_link_state = LINK_STATE_UP;
314 		grp->lg_link_duplex = LINK_DUPLEX_FULL;
315 		link_state_changed = B_TRUE;
316 	}
317 
318 	/*
319 	 * Update port's state.
320 	 */
321 	port->lp_state = AGGR_PORT_STATE_ATTACHED;
322 
323 	aggr_grp_multicst_port(port, B_TRUE);
324 
325 	/*
326 	 * Set port's receive callback
327 	 */
328 	mac_rx_set(port->lp_mch, aggr_recv_cb, port);
329 
330 	/*
331 	 * If LACP is OFF, the port can be used to send data as soon
332 	 * as its link is up and verified to be compatible with the
333 	 * aggregation.
334 	 *
335 	 * If LACP is active or passive, notify the LACP subsystem, which
336 	 * will enable sending on the port following the LACP protocol.
337 	 */
338 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
339 		aggr_send_port_enable(port);
340 	else
341 		aggr_lacp_port_attached(port);
342 
343 	return (link_state_changed);
344 }
345 
346 boolean_t
347 aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port)
348 {
349 	boolean_t link_state_changed = B_FALSE;
350 
351 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
352 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
353 
354 	/* update state */
355 	if (port->lp_state != AGGR_PORT_STATE_ATTACHED)
356 		return (B_FALSE);
357 
358 	mac_rx_clear(port->lp_mch);
359 
360 	aggr_grp_multicst_port(port, B_FALSE);
361 
362 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
363 		aggr_send_port_disable(port);
364 	else
365 		aggr_lacp_port_detached(port);
366 
367 	port->lp_state = AGGR_PORT_STATE_STANDBY;
368 
369 	grp->lg_nattached_ports--;
370 	if (grp->lg_nattached_ports == 0) {
371 		/* the last attached MAC port of the group is being detached */
372 		grp->lg_ifspeed = 0;
373 		grp->lg_link_state = LINK_STATE_DOWN;
374 		grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
375 		link_state_changed = B_TRUE;
376 	}
377 
378 	return (link_state_changed);
379 }
380 
381 /*
382  * Update the MAC addresses of the constituent ports of the specified
383  * group. This function is invoked:
384  * - after creating a new aggregation group.
385  * - after adding new ports to an aggregation group.
386  * - after removing a port from a group when the MAC address of
387  *   that port was used for the MAC address of the group.
388  * - after the MAC address of a port changed when the MAC address
389  *   of that port was used for the MAC address of the group.
390  *
391  * Return true if the link state of the aggregation changed, for example
392  * as a result of a failure changing the MAC address of one of the
393  * constituent ports.
394  */
395 boolean_t
396 aggr_grp_update_ports_mac(aggr_grp_t *grp)
397 {
398 	aggr_port_t *cport;
399 	boolean_t link_state_changed = B_FALSE;
400 	mac_perim_handle_t mph;
401 
402 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
403 
404 	for (cport = grp->lg_ports; cport != NULL;
405 	    cport = cport->lp_next) {
406 		mac_perim_enter_by_mh(cport->lp_mh, &mph);
407 		if (aggr_port_unicst(cport) != 0) {
408 			if (aggr_grp_detach_port(grp, cport))
409 				link_state_changed = B_TRUE;
410 		} else {
411 			/*
412 			 * If a port was detached because of a previous
413 			 * failure changing the MAC address, the port is
414 			 * reattached when it successfully changes the MAC
415 			 * address now, and this might cause the link state
416 			 * of the aggregation to change.
417 			 */
418 			if (aggr_grp_attach_port(grp, cport))
419 				link_state_changed = B_TRUE;
420 		}
421 		mac_perim_exit(mph);
422 	}
423 	return (link_state_changed);
424 }
425 
426 /*
427  * Invoked when the MAC address of a port has changed. If the port's
428  * MAC address was used for the group MAC address, set mac_addr_changedp
429  * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST
430  * notification. If the link state changes due to detach/attach of
431  * the constituent port, set link_state_changedp to B_TRUE to indicate
432  * to the caller that it should send a MAC_NOTE_LINK notification. In both
433  * cases, it is the responsibility of the caller to invoke notification
434  * functions after releasing the the port lock.
435  */
436 void
437 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port,
438     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
439 {
440 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
441 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
442 	ASSERT(mac_addr_changedp != NULL);
443 	ASSERT(link_state_changedp != NULL);
444 
445 	*mac_addr_changedp = B_FALSE;
446 	*link_state_changedp = B_FALSE;
447 
448 	if (grp->lg_addr_fixed) {
449 		/*
450 		 * The group is using a fixed MAC address or an automatic
451 		 * MAC address has not been set.
452 		 */
453 		return;
454 	}
455 
456 	if (grp->lg_mac_addr_port == port) {
457 		/*
458 		 * The MAC address of the port was assigned to the group
459 		 * MAC address. Update the group MAC address.
460 		 */
461 		bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
462 		*mac_addr_changedp = B_TRUE;
463 	} else {
464 		/*
465 		 * Update the actual port MAC address to the MAC address
466 		 * of the group.
467 		 */
468 		if (aggr_port_unicst(port) != 0) {
469 			*link_state_changedp = aggr_grp_detach_port(grp, port);
470 		} else {
471 			/*
472 			 * If a port was detached because of a previous
473 			 * failure changing the MAC address, the port is
474 			 * reattached when it successfully changes the MAC
475 			 * address now, and this might cause the link state
476 			 * of the aggregation to change.
477 			 */
478 			*link_state_changedp = aggr_grp_attach_port(grp, port);
479 		}
480 	}
481 }
482 
483 /*
484  * Add a port to a link aggregation group.
485  */
486 static int
487 aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force,
488     aggr_port_t **pp)
489 {
490 	aggr_port_t *port, **cport;
491 	mac_perim_handle_t mph;
492 	zoneid_t port_zoneid = ALL_ZONES;
493 	int err;
494 
495 	/* The port must be int the same zone as the aggregation. */
496 	if (zone_check_datalink(&port_zoneid, port_linkid) != 0)
497 		port_zoneid = GLOBAL_ZONEID;
498 	if (grp->lg_zoneid != port_zoneid)
499 		return (EBUSY);
500 
501 	/*
502 	 * lg_mh could be NULL when the function is called during the creation
503 	 * of the aggregation.
504 	 */
505 	ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh));
506 
507 	/* create new port */
508 	err = aggr_port_create(grp, port_linkid, force, &port);
509 	if (err != 0)
510 		return (err);
511 
512 	mac_perim_enter_by_mh(port->lp_mh, &mph);
513 
514 	/* add port to list of group constituent ports */
515 	cport = &grp->lg_ports;
516 	while (*cport != NULL)
517 		cport = &((*cport)->lp_next);
518 	*cport = port;
519 
520 	/*
521 	 * Back reference to the group it is member of. A port always
522 	 * holds a reference to its group to ensure that the back
523 	 * reference is always valid.
524 	 */
525 	port->lp_grp = grp;
526 	AGGR_GRP_REFHOLD(grp);
527 	grp->lg_nports++;
528 
529 	aggr_lacp_init_port(port);
530 	mac_perim_exit(mph);
531 
532 	if (pp != NULL)
533 		*pp = port;
534 
535 	return (0);
536 }
537 
538 /*
539  * Add a pseudo Rx ring for the given HW ring handle.
540  */
541 static int
542 aggr_add_pseudo_rx_ring(aggr_port_t *port,
543     aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
544 {
545 	aggr_pseudo_rx_ring_t	*ring;
546 	int			err;
547 	int			j;
548 
549 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
550 		ring = rx_grp->arg_rings + j;
551 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE))
552 			break;
553 	}
554 
555 	/*
556 	 * No slot for this new Rx ring.
557 	 */
558 	if (j == MAX_RINGS_PER_GROUP)
559 		return (EIO);
560 
561 	ring->arr_flags |= MAC_PSEUDO_RING_INUSE;
562 	ring->arr_hw_rh = hw_rh;
563 	ring->arr_port = port;
564 	rx_grp->arg_ring_cnt++;
565 
566 	/*
567 	 * The group is already registered, dynamically add a new ring to the
568 	 * mac group.
569 	 */
570 	mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring);
571 	if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) {
572 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
573 		ring->arr_hw_rh = NULL;
574 		ring->arr_port = NULL;
575 		rx_grp->arg_ring_cnt--;
576 		mac_hwring_teardown(hw_rh);
577 	}
578 	return (err);
579 }
580 
581 /*
582  * Remove the pseudo Rx ring of the given HW ring handle.
583  */
584 static void
585 aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh)
586 {
587 	aggr_pseudo_rx_ring_t	*ring;
588 	int			j;
589 
590 	for (j = 0; j < MAX_RINGS_PER_GROUP; j++) {
591 		ring = rx_grp->arg_rings + j;
592 		if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) ||
593 		    ring->arr_hw_rh != hw_rh) {
594 			continue;
595 		}
596 
597 		mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh);
598 
599 		ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE;
600 		ring->arr_hw_rh = NULL;
601 		ring->arr_port = NULL;
602 		rx_grp->arg_ring_cnt--;
603 		mac_hwring_teardown(hw_rh);
604 		break;
605 	}
606 }
607 
608 /*
609  * This function is called to create pseudo rings over the hardware rings of
610  * the underlying device. Note that there is a 1:1 mapping between the pseudo
611  * RX rings of the aggr and the hardware rings of the underlying port.
612  */
613 static int
614 aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
615 {
616 	aggr_grp_t		*grp = port->lp_grp;
617 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
618 	aggr_unicst_addr_t	*addr, *a;
619 	mac_perim_handle_t	pmph;
620 	int			hw_rh_cnt, i = 0, j;
621 	int			err = 0;
622 
623 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
624 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
625 
626 	/*
627 	 * This function must be called after the aggr registers its mac
628 	 * and its RX group has been initialized.
629 	 */
630 	ASSERT(rx_grp->arg_gh != NULL);
631 
632 	/*
633 	 * Get the list the the underlying HW rings.
634 	 */
635 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &port->lp_hwgh, hw_rh,
636 	    MAC_RING_TYPE_RX);
637 
638 	if (port->lp_hwgh != NULL) {
639 		/*
640 		 * Quiesce the HW ring and the mac srs on the ring. Note
641 		 * that the HW ring will be restarted when the pseudo ring
642 		 * is started. At that time all the packets will be
643 		 * directly passed up to the pseudo RX ring and handled
644 		 * by mac srs created over the pseudo RX ring.
645 		 */
646 		mac_rx_client_quiesce(port->lp_mch);
647 		mac_srs_perm_quiesce(port->lp_mch, B_TRUE);
648 	}
649 
650 	/*
651 	 * Add all the unicast addresses to the newly added port.
652 	 */
653 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) {
654 		if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0)
655 			break;
656 	}
657 
658 	for (i = 0; err == 0 && i < hw_rh_cnt; i++)
659 		err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]);
660 
661 	if (err != 0) {
662 		for (j = 0; j < i; j++)
663 			aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]);
664 
665 		for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next)
666 			aggr_port_remmac(port, a->aua_addr);
667 
668 		if (port->lp_hwgh != NULL) {
669 			mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
670 			mac_rx_client_restart(port->lp_mch);
671 			port->lp_hwgh = NULL;
672 		}
673 	} else {
674 		port->lp_grp_added = B_TRUE;
675 	}
676 done:
677 	mac_perim_exit(pmph);
678 	return (err);
679 }
680 
681 /*
682  * This function is called by aggr to remove pseudo RX rings over the
683  * HW rings of the underlying port.
684  */
685 static void
686 aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp)
687 {
688 	aggr_grp_t		*grp = port->lp_grp;
689 	mac_ring_handle_t	hw_rh[MAX_RINGS_PER_GROUP];
690 	aggr_unicst_addr_t	*addr;
691 	mac_group_handle_t	hwgh;
692 	mac_perim_handle_t	pmph;
693 	int			hw_rh_cnt, i;
694 
695 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
696 	mac_perim_enter_by_mh(port->lp_mh, &pmph);
697 
698 	if (!port->lp_grp_added)
699 		goto done;
700 
701 	ASSERT(rx_grp->arg_gh != NULL);
702 	hw_rh_cnt = mac_hwrings_get(port->lp_mch, &hwgh, hw_rh,
703 	    MAC_RING_TYPE_RX);
704 
705 	/*
706 	 * If hw_rh_cnt is 0, it means that the underlying port does not
707 	 * support RX rings. Directly return in this case.
708 	 */
709 	for (i = 0; i < hw_rh_cnt; i++)
710 		aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]);
711 
712 	for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next)
713 		aggr_port_remmac(port, addr->aua_addr);
714 
715 	if (port->lp_hwgh != NULL) {
716 		port->lp_hwgh = NULL;
717 
718 		/*
719 		 * First clear the permanent-quiesced flag of the RX srs then
720 		 * restart the HW ring and the mac srs on the ring. Note that
721 		 * the HW ring and associated SRS will soon been removed when
722 		 * the port is removed from the aggr.
723 		 */
724 		mac_srs_perm_quiesce(port->lp_mch, B_FALSE);
725 		mac_rx_client_restart(port->lp_mch);
726 	}
727 
728 	port->lp_grp_added = B_FALSE;
729 done:
730 	mac_perim_exit(pmph);
731 }
732 
733 static int
734 aggr_pseudo_disable_intr(mac_intr_handle_t ih)
735 {
736 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
737 	return (mac_hwring_disable_intr(rr_ring->arr_hw_rh));
738 }
739 
740 static int
741 aggr_pseudo_enable_intr(mac_intr_handle_t ih)
742 {
743 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih;
744 	return (mac_hwring_enable_intr(rr_ring->arr_hw_rh));
745 }
746 
747 static int
748 aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen)
749 {
750 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
751 	int err;
752 
753 	err = mac_hwring_start(rr_ring->arr_hw_rh);
754 	if (err == 0)
755 		rr_ring->arr_gen = mr_gen;
756 	return (err);
757 }
758 
759 static void
760 aggr_pseudo_stop_ring(mac_ring_driver_t arg)
761 {
762 	aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg;
763 	mac_hwring_stop(rr_ring->arr_hw_rh);
764 }
765 
766 /*
767  * Add one or more ports to an existing link aggregation group.
768  */
769 int
770 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force,
771     laioc_port_t *ports)
772 {
773 	int rc, i, nadded = 0;
774 	aggr_grp_t *grp = NULL;
775 	aggr_port_t *port;
776 	boolean_t link_state_changed = B_FALSE;
777 	mac_perim_handle_t mph, pmph;
778 
779 	/* get group corresponding to linkid */
780 	rw_enter(&aggr_grp_lock, RW_READER);
781 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
782 	    (mod_hash_val_t *)&grp) != 0) {
783 		rw_exit(&aggr_grp_lock);
784 		return (ENOENT);
785 	}
786 	AGGR_GRP_REFHOLD(grp);
787 
788 	/*
789 	 * Hold the perimeter so that the aggregation won't be destroyed.
790 	 */
791 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
792 	rw_exit(&aggr_grp_lock);
793 
794 	/* add the specified ports to group */
795 	for (i = 0; i < nports; i++) {
796 		/* add port to group */
797 		if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid,
798 		    force, &port)) != 0) {
799 			goto bail;
800 		}
801 		ASSERT(port != NULL);
802 		nadded++;
803 
804 		/* check capabilities */
805 		if (!aggr_grp_capab_check(grp, port) ||
806 		    !aggr_grp_sdu_check(grp, port) ||
807 		    !aggr_grp_margin_check(grp, port)) {
808 			rc = ENOTSUP;
809 			goto bail;
810 		}
811 
812 		/*
813 		 * Create the pseudo ring for each HW ring of the underlying
814 		 * port.
815 		 */
816 		rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group);
817 		if (rc != 0)
818 			goto bail;
819 
820 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
821 
822 		/* set LACP mode */
823 		aggr_port_lacp_set_mode(grp, port);
824 
825 		/* start port if group has already been started */
826 		if (grp->lg_started) {
827 			rc = aggr_port_start(port);
828 			if (rc != 0) {
829 				mac_perim_exit(pmph);
830 				goto bail;
831 			}
832 
833 			/*
834 			 * Turn on the promiscuous mode over the port when it
835 			 * is requested to be turned on to receive the
836 			 * non-primary address over a port, or the promiscous
837 			 * mode is enabled over the aggr.
838 			 */
839 			if (grp->lg_promisc || port->lp_prom_addr != NULL) {
840 				rc = aggr_port_promisc(port, B_TRUE);
841 				if (rc != 0) {
842 					mac_perim_exit(pmph);
843 					goto bail;
844 				}
845 			}
846 		}
847 		mac_perim_exit(pmph);
848 
849 		/*
850 		 * Attach each port if necessary.
851 		 */
852 		if (aggr_port_notify_link(grp, port))
853 			link_state_changed = B_TRUE;
854 
855 		/*
856 		 * Initialize the callback functions for this port.
857 		 */
858 		aggr_port_init_callbacks(port);
859 	}
860 
861 	/* update the MAC address of the constituent ports */
862 	if (aggr_grp_update_ports_mac(grp))
863 		link_state_changed = B_TRUE;
864 
865 	if (link_state_changed)
866 		mac_link_update(grp->lg_mh, grp->lg_link_state);
867 
868 bail:
869 	if (rc != 0) {
870 		/* stop and remove ports that have been added */
871 		for (i = 0; i < nadded; i++) {
872 			port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
873 			ASSERT(port != NULL);
874 			if (grp->lg_started) {
875 				mac_perim_enter_by_mh(port->lp_mh, &pmph);
876 				(void) aggr_port_promisc(port, B_FALSE);
877 				aggr_port_stop(port);
878 				mac_perim_exit(pmph);
879 			}
880 			aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
881 			(void) aggr_grp_rem_port(grp, port, NULL, NULL);
882 		}
883 	}
884 
885 	mac_perim_exit(mph);
886 	AGGR_GRP_REFRELE(grp);
887 	return (rc);
888 }
889 
890 static int
891 aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy,
892     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
893     aggr_lacp_timer_t lacp_timer)
894 {
895 	boolean_t mac_addr_changed = B_FALSE;
896 	boolean_t link_state_changed = B_FALSE;
897 	mac_perim_handle_t pmph;
898 
899 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
900 
901 	/* validate fixed address if specified */
902 	if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed &&
903 	    ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) ||
904 	    (mac_addr[0] & 0x01))) {
905 		return (EINVAL);
906 	}
907 
908 	/* update policy if requested */
909 	if (update_mask & AGGR_MODIFY_POLICY)
910 		aggr_send_update_policy(grp, policy);
911 
912 	/* update unicast MAC address if requested */
913 	if (update_mask & AGGR_MODIFY_MAC) {
914 		if (mac_fixed) {
915 			/* user-supplied MAC address */
916 			grp->lg_mac_addr_port = NULL;
917 			if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) {
918 				bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
919 				mac_addr_changed = B_TRUE;
920 			}
921 		} else if (grp->lg_addr_fixed) {
922 			/* switch from user-supplied to automatic */
923 			aggr_port_t *port = grp->lg_ports;
924 
925 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
926 			bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL);
927 			grp->lg_mac_addr_port = port;
928 			mac_addr_changed = B_TRUE;
929 			mac_perim_exit(pmph);
930 		}
931 		grp->lg_addr_fixed = mac_fixed;
932 	}
933 
934 	if (mac_addr_changed)
935 		link_state_changed = aggr_grp_update_ports_mac(grp);
936 
937 	if (update_mask & AGGR_MODIFY_LACP_MODE)
938 		aggr_lacp_update_mode(grp, lacp_mode);
939 
940 	if (update_mask & AGGR_MODIFY_LACP_TIMER)
941 		aggr_lacp_update_timer(grp, lacp_timer);
942 
943 	if (link_state_changed)
944 		mac_link_update(grp->lg_mh, grp->lg_link_state);
945 
946 	if (mac_addr_changed)
947 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
948 
949 	return (0);
950 }
951 
952 /*
953  * Update properties of an existing link aggregation group.
954  */
955 int
956 aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy,
957     boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode,
958     aggr_lacp_timer_t lacp_timer)
959 {
960 	aggr_grp_t *grp = NULL;
961 	mac_perim_handle_t mph;
962 	int err;
963 
964 	/* get group corresponding to linkid */
965 	rw_enter(&aggr_grp_lock, RW_READER);
966 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
967 	    (mod_hash_val_t *)&grp) != 0) {
968 		rw_exit(&aggr_grp_lock);
969 		return (ENOENT);
970 	}
971 	AGGR_GRP_REFHOLD(grp);
972 
973 	/*
974 	 * Hold the perimeter so that the aggregation won't be destroyed.
975 	 */
976 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
977 	rw_exit(&aggr_grp_lock);
978 
979 	err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed,
980 	    mac_addr, lacp_mode, lacp_timer);
981 
982 	mac_perim_exit(mph);
983 	AGGR_GRP_REFRELE(grp);
984 	return (err);
985 }
986 
987 /*
988  * Create a new link aggregation group upon request from administrator.
989  * Returns 0 on success, an errno on failure.
990  */
991 int
992 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports,
993     laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force,
994     uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer,
995     cred_t *credp)
996 {
997 	aggr_grp_t *grp = NULL;
998 	aggr_port_t *port;
999 	mac_register_t *mac;
1000 	boolean_t link_state_changed;
1001 	mac_perim_handle_t mph;
1002 	int err;
1003 	int i;
1004 
1005 	/* need at least one port */
1006 	if (nports == 0)
1007 		return (EINVAL);
1008 
1009 	rw_enter(&aggr_grp_lock, RW_WRITER);
1010 
1011 	/* does a group with the same linkid already exist? */
1012 	err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1013 	    (mod_hash_val_t *)&grp);
1014 	if (err == 0) {
1015 		rw_exit(&aggr_grp_lock);
1016 		return (EEXIST);
1017 	}
1018 
1019 	grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP);
1020 
1021 	grp->lg_refs = 1;
1022 	grp->lg_closing = B_FALSE;
1023 	grp->lg_force = force;
1024 	grp->lg_linkid = linkid;
1025 	grp->lg_zoneid = crgetzoneid(credp);
1026 	grp->lg_ifspeed = 0;
1027 	grp->lg_link_state = LINK_STATE_UNKNOWN;
1028 	grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN;
1029 	grp->lg_started = B_FALSE;
1030 	grp->lg_promisc = B_FALSE;
1031 	grp->lg_lacp_done = B_FALSE;
1032 	grp->lg_lacp_head = grp->lg_lacp_tail = NULL;
1033 	grp->lg_lacp_rx_thread = thread_create(NULL, 0,
1034 	    aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri);
1035 	bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t));
1036 	aggr_lacp_init_grp(grp);
1037 
1038 	/* add MAC ports to group */
1039 	grp->lg_ports = NULL;
1040 	grp->lg_nports = 0;
1041 	grp->lg_nattached_ports = 0;
1042 	grp->lg_ntx_ports = 0;
1043 
1044 	/*
1045 	 * If key is not specified by the user, allocate the key.
1046 	 */
1047 	if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) {
1048 		err = ENOMEM;
1049 		goto bail;
1050 	}
1051 	grp->lg_key = key;
1052 
1053 	for (i = 0; i < nports; i++) {
1054 		err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL);
1055 		if (err != 0)
1056 			goto bail;
1057 	}
1058 
1059 	/*
1060 	 * If no explicit MAC address was specified by the administrator,
1061 	 * set it to the MAC address of the first port.
1062 	 */
1063 	grp->lg_addr_fixed = mac_fixed;
1064 	if (grp->lg_addr_fixed) {
1065 		/* validate specified address */
1066 		if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) {
1067 			err = EINVAL;
1068 			goto bail;
1069 		}
1070 		bcopy(mac_addr, grp->lg_addr, ETHERADDRL);
1071 	} else {
1072 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1073 		grp->lg_mac_addr_port = grp->lg_ports;
1074 	}
1075 
1076 	/* set the initial group capabilities */
1077 	aggr_grp_capab_set(grp);
1078 
1079 	if ((mac = mac_alloc(MAC_VERSION)) == NULL) {
1080 		err = ENOMEM;
1081 		goto bail;
1082 	}
1083 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1084 	mac->m_driver = grp;
1085 	mac->m_dip = aggr_dip;
1086 	mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key;
1087 	mac->m_src_addr = grp->lg_addr;
1088 	mac->m_callbacks = &aggr_m_callbacks;
1089 	mac->m_min_sdu = 0;
1090 	mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp);
1091 	mac->m_margin = aggr_grp_max_margin(grp);
1092 	mac->m_v12n = MAC_VIRT_LEVEL1;
1093 	err = mac_register(mac, &grp->lg_mh);
1094 	mac_free(mac);
1095 	if (err != 0)
1096 		goto bail;
1097 
1098 	err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp));
1099 	if (err != 0) {
1100 		(void) mac_unregister(grp->lg_mh);
1101 		grp->lg_mh = NULL;
1102 		goto bail;
1103 	}
1104 
1105 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1106 
1107 	/*
1108 	 * Update the MAC address of the constituent ports.
1109 	 * None of the port is attached at this time, the link state of the
1110 	 * aggregation will not change.
1111 	 */
1112 	link_state_changed = aggr_grp_update_ports_mac(grp);
1113 	ASSERT(!link_state_changed);
1114 
1115 	/* update outbound load balancing policy */
1116 	aggr_send_update_policy(grp, policy);
1117 
1118 	/* set LACP mode */
1119 	aggr_lacp_set_mode(grp, lacp_mode, lacp_timer);
1120 
1121 	/*
1122 	 * Attach each port if necessary.
1123 	 */
1124 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1125 		/*
1126 		 * Create the pseudo ring for each HW ring of the underlying
1127 		 * port. Note that this is done after the aggr registers the
1128 		 * mac.
1129 		 */
1130 		VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0);
1131 		if (aggr_port_notify_link(grp, port))
1132 			link_state_changed = B_TRUE;
1133 
1134 		/*
1135 		 * Initialize the callback functions for this port.
1136 		 */
1137 		aggr_port_init_callbacks(port);
1138 	}
1139 
1140 	if (link_state_changed)
1141 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1142 
1143 	/* add new group to hash table */
1144 	err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid),
1145 	    (mod_hash_val_t)grp);
1146 	ASSERT(err == 0);
1147 	aggr_grp_cnt++;
1148 
1149 	mac_perim_exit(mph);
1150 	rw_exit(&aggr_grp_lock);
1151 	return (0);
1152 
1153 bail:
1154 
1155 	grp->lg_closing = B_TRUE;
1156 
1157 	port = grp->lg_ports;
1158 	while (port != NULL) {
1159 		aggr_port_t *cport;
1160 
1161 		cport = port->lp_next;
1162 		aggr_port_delete(port);
1163 		port = cport;
1164 	}
1165 
1166 	/*
1167 	 * Inform the lacp_rx thread to exit.
1168 	 */
1169 	mutex_enter(&grp->lg_lacp_lock);
1170 	grp->lg_lacp_done = B_TRUE;
1171 	cv_signal(&grp->lg_lacp_cv);
1172 	while (grp->lg_lacp_rx_thread != NULL)
1173 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1174 	mutex_exit(&grp->lg_lacp_lock);
1175 
1176 	rw_exit(&aggr_grp_lock);
1177 	AGGR_GRP_REFRELE(grp);
1178 	return (err);
1179 }
1180 
1181 /*
1182  * Return a pointer to the member of a group with specified linkid.
1183  */
1184 static aggr_port_t *
1185 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid)
1186 {
1187 	aggr_port_t *port;
1188 
1189 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1190 
1191 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1192 		if (port->lp_linkid == linkid)
1193 			break;
1194 	}
1195 
1196 	return (port);
1197 }
1198 
1199 /*
1200  * Stop, detach and remove a port from a link aggregation group.
1201  */
1202 static int
1203 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port,
1204     boolean_t *mac_addr_changedp, boolean_t *link_state_changedp)
1205 {
1206 	int rc = 0;
1207 	aggr_port_t **pport;
1208 	boolean_t mac_addr_changed = B_FALSE;
1209 	boolean_t link_state_changed = B_FALSE;
1210 	mac_perim_handle_t mph;
1211 	uint64_t val;
1212 	uint_t i;
1213 	uint_t stat;
1214 
1215 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
1216 	ASSERT(grp->lg_nports > 1);
1217 	ASSERT(!grp->lg_closing);
1218 
1219 	/* unlink port */
1220 	for (pport = &grp->lg_ports; *pport != port;
1221 	    pport = &(*pport)->lp_next) {
1222 		if (*pport == NULL) {
1223 			rc = ENOENT;
1224 			goto done;
1225 		}
1226 	}
1227 	*pport = port->lp_next;
1228 
1229 	mac_perim_enter_by_mh(port->lp_mh, &mph);
1230 
1231 	/*
1232 	 * If the MAC address of the port being removed was assigned
1233 	 * to the group, update the group MAC address
1234 	 * using the MAC address of a different port.
1235 	 */
1236 	if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) {
1237 		/*
1238 		 * Set the MAC address of the group to the
1239 		 * MAC address of its first port.
1240 		 */
1241 		bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL);
1242 		grp->lg_mac_addr_port = grp->lg_ports;
1243 		mac_addr_changed = B_TRUE;
1244 	}
1245 
1246 	link_state_changed = aggr_grp_detach_port(grp, port);
1247 
1248 	/*
1249 	 * Add the counter statistics of the ports while it was aggregated
1250 	 * to the group's residual statistics.  This is done by obtaining
1251 	 * the current counter from the underlying MAC then subtracting the
1252 	 * value of the counter at the moment it was added to the
1253 	 * aggregation.
1254 	 */
1255 	for (i = 0; i < MAC_NSTAT; i++) {
1256 		stat = i + MAC_STAT_MIN;
1257 		if (!MAC_STAT_ISACOUNTER(stat))
1258 			continue;
1259 		val = aggr_port_stat(port, stat);
1260 		val -= port->lp_stat[i];
1261 		grp->lg_stat[i] += val;
1262 	}
1263 	for (i = 0; i < ETHER_NSTAT; i++) {
1264 		stat = i + MACTYPE_STAT_MIN;
1265 		if (!ETHER_STAT_ISACOUNTER(stat))
1266 			continue;
1267 		val = aggr_port_stat(port, stat);
1268 		val -= port->lp_ether_stat[i];
1269 		grp->lg_ether_stat[i] += val;
1270 	}
1271 
1272 	grp->lg_nports--;
1273 	mac_perim_exit(mph);
1274 
1275 	aggr_port_delete(port);
1276 
1277 	/*
1278 	 * If the group MAC address has changed, update the MAC address of
1279 	 * the remaining constituent ports according to the new MAC
1280 	 * address of the group.
1281 	 */
1282 	if (mac_addr_changed && aggr_grp_update_ports_mac(grp))
1283 		link_state_changed = B_TRUE;
1284 
1285 done:
1286 	if (mac_addr_changedp != NULL)
1287 		*mac_addr_changedp = mac_addr_changed;
1288 	if (link_state_changedp != NULL)
1289 		*link_state_changedp = link_state_changed;
1290 
1291 	return (rc);
1292 }
1293 
1294 /*
1295  * Remove one or more ports from an existing link aggregation group.
1296  */
1297 int
1298 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports)
1299 {
1300 	int rc = 0, i;
1301 	aggr_grp_t *grp = NULL;
1302 	aggr_port_t *port;
1303 	boolean_t mac_addr_update = B_FALSE, mac_addr_changed;
1304 	boolean_t link_state_update = B_FALSE, link_state_changed;
1305 	mac_perim_handle_t mph, pmph;
1306 
1307 	/* get group corresponding to linkid */
1308 	rw_enter(&aggr_grp_lock, RW_READER);
1309 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1310 	    (mod_hash_val_t *)&grp) != 0) {
1311 		rw_exit(&aggr_grp_lock);
1312 		return (ENOENT);
1313 	}
1314 	AGGR_GRP_REFHOLD(grp);
1315 
1316 	/*
1317 	 * Hold the perimeter so that the aggregation won't be destroyed.
1318 	 */
1319 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1320 	rw_exit(&aggr_grp_lock);
1321 
1322 	/* we need to keep at least one port per group */
1323 	if (nports >= grp->lg_nports) {
1324 		rc = EINVAL;
1325 		goto bail;
1326 	}
1327 
1328 	/* first verify that all the groups are valid */
1329 	for (i = 0; i < nports; i++) {
1330 		if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) {
1331 			/* port not found */
1332 			rc = ENOENT;
1333 			goto bail;
1334 		}
1335 	}
1336 
1337 	/* clear the promiscous mode for the specified ports */
1338 	for (i = 0; i < nports && rc == 0; i++) {
1339 		/* lookup port */
1340 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1341 		ASSERT(port != NULL);
1342 
1343 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1344 		rc = aggr_port_promisc(port, B_FALSE);
1345 		mac_perim_exit(pmph);
1346 	}
1347 	if (rc != 0) {
1348 		for (i = 0; i < nports; i++) {
1349 			port = aggr_grp_port_lookup(grp,
1350 			    ports[i].lp_linkid);
1351 			ASSERT(port != NULL);
1352 
1353 			/*
1354 			 * Turn the promiscuous mode back on if it is required
1355 			 * to receive the non-primary address over a port, or
1356 			 * the promiscous mode is enabled over the aggr.
1357 			 */
1358 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1359 			if (port->lp_started && (grp->lg_promisc ||
1360 			    port->lp_prom_addr != NULL)) {
1361 				(void) aggr_port_promisc(port, B_TRUE);
1362 			}
1363 			mac_perim_exit(pmph);
1364 		}
1365 		goto bail;
1366 	}
1367 
1368 	/* remove the specified ports from group */
1369 	for (i = 0; i < nports; i++) {
1370 		/* lookup port */
1371 		port = aggr_grp_port_lookup(grp, ports[i].lp_linkid);
1372 		ASSERT(port != NULL);
1373 
1374 		/* stop port if group has already been started */
1375 		if (grp->lg_started) {
1376 			mac_perim_enter_by_mh(port->lp_mh, &pmph);
1377 			aggr_port_stop(port);
1378 			mac_perim_exit(pmph);
1379 		}
1380 
1381 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1382 		/* remove port from group */
1383 		rc = aggr_grp_rem_port(grp, port, &mac_addr_changed,
1384 		    &link_state_changed);
1385 		ASSERT(rc == 0);
1386 		mac_addr_update = mac_addr_update || mac_addr_changed;
1387 		link_state_update = link_state_update || link_state_changed;
1388 	}
1389 
1390 bail:
1391 	if (mac_addr_update)
1392 		mac_unicst_update(grp->lg_mh, grp->lg_addr);
1393 	if (link_state_update)
1394 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1395 
1396 	mac_perim_exit(mph);
1397 	AGGR_GRP_REFRELE(grp);
1398 
1399 	return (rc);
1400 }
1401 
1402 int
1403 aggr_grp_delete(datalink_id_t linkid, cred_t *cred)
1404 {
1405 	aggr_grp_t *grp = NULL;
1406 	aggr_port_t *port, *cport;
1407 	datalink_id_t tmpid;
1408 	mod_hash_val_t val;
1409 	mac_perim_handle_t mph, pmph;
1410 	int err;
1411 
1412 	rw_enter(&aggr_grp_lock, RW_WRITER);
1413 
1414 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1415 	    (mod_hash_val_t *)&grp) != 0) {
1416 		rw_exit(&aggr_grp_lock);
1417 		return (ENOENT);
1418 	}
1419 
1420 	/*
1421 	 * Note that dls_devnet_destroy() must be called before lg_lock is
1422 	 * held. Otherwise, it will deadlock if another thread is in
1423 	 * aggr_m_stat() and thus has a kstat_hold() on the kstats that
1424 	 * dls_devnet_destroy() needs to delete.
1425 	 */
1426 	if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) {
1427 		rw_exit(&aggr_grp_lock);
1428 		return (err);
1429 	}
1430 	ASSERT(linkid == tmpid);
1431 
1432 	/*
1433 	 * Unregister from the MAC service module. Since this can
1434 	 * fail if a client hasn't closed the MAC port, we gracefully
1435 	 * fail the operation.
1436 	 */
1437 	if ((err = mac_disable(grp->lg_mh)) != 0) {
1438 		(void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred));
1439 		rw_exit(&aggr_grp_lock);
1440 		return (err);
1441 	}
1442 	(void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val);
1443 	ASSERT(grp == (aggr_grp_t *)val);
1444 
1445 	ASSERT(aggr_grp_cnt > 0);
1446 	aggr_grp_cnt--;
1447 	rw_exit(&aggr_grp_lock);
1448 
1449 	/*
1450 	 * Inform the lacp_rx thread to exit.
1451 	 */
1452 	mutex_enter(&grp->lg_lacp_lock);
1453 	grp->lg_lacp_done = B_TRUE;
1454 	cv_signal(&grp->lg_lacp_cv);
1455 	while (grp->lg_lacp_rx_thread != NULL)
1456 		cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock);
1457 	mutex_exit(&grp->lg_lacp_lock);
1458 
1459 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1460 
1461 	grp->lg_closing = B_TRUE;
1462 	/* detach and free MAC ports associated with group */
1463 	port = grp->lg_ports;
1464 	while (port != NULL) {
1465 		cport = port->lp_next;
1466 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1467 		if (grp->lg_started)
1468 			aggr_port_stop(port);
1469 		(void) aggr_grp_detach_port(grp, port);
1470 		mac_perim_exit(pmph);
1471 		aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group);
1472 		aggr_port_delete(port);
1473 		port = cport;
1474 	}
1475 
1476 	mac_perim_exit(mph);
1477 
1478 	/*
1479 	 * Wait for the port's lacp timer thread and its notification callback
1480 	 * to exit before calling mac_unregister() since both needs to access
1481 	 * the mac perimeter of the grp.
1482 	 */
1483 	aggr_grp_port_wait(grp);
1484 
1485 	VERIFY(mac_unregister(grp->lg_mh) == 0);
1486 	grp->lg_mh = NULL;
1487 
1488 	AGGR_GRP_REFRELE(grp);
1489 	return (0);
1490 }
1491 
1492 void
1493 aggr_grp_free(aggr_grp_t *grp)
1494 {
1495 	ASSERT(grp->lg_refs == 0);
1496 	ASSERT(grp->lg_port_ref == 0);
1497 	if (grp->lg_key > AGGR_MAX_KEY) {
1498 		id_free(key_ids, grp->lg_key);
1499 		grp->lg_key = 0;
1500 	}
1501 	kmem_cache_free(aggr_grp_cache, grp);
1502 }
1503 
1504 int
1505 aggr_grp_info(datalink_id_t linkid, void *fn_arg,
1506     aggr_grp_info_new_grp_fn_t new_grp_fn,
1507     aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred)
1508 {
1509 	aggr_grp_t	*grp;
1510 	aggr_port_t	*port;
1511 	mac_perim_handle_t mph, pmph;
1512 	int		rc = 0;
1513 
1514 	/*
1515 	 * Make sure that the aggregation link is visible from the caller's
1516 	 * zone.
1517 	 */
1518 	if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred)))
1519 		return (ENOENT);
1520 
1521 	rw_enter(&aggr_grp_lock, RW_READER);
1522 
1523 	if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid),
1524 	    (mod_hash_val_t *)&grp) != 0) {
1525 		rw_exit(&aggr_grp_lock);
1526 		return (ENOENT);
1527 	}
1528 	AGGR_GRP_REFHOLD(grp);
1529 
1530 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1531 	rw_exit(&aggr_grp_lock);
1532 
1533 	rc = new_grp_fn(fn_arg, grp->lg_linkid,
1534 	    (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr,
1535 	    grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy,
1536 	    grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer);
1537 
1538 	if (rc != 0)
1539 		goto bail;
1540 
1541 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1542 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1543 		rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr,
1544 		    port->lp_state, &port->lp_lacp.ActorOperPortState);
1545 		mac_perim_exit(pmph);
1546 
1547 		if (rc != 0)
1548 			goto bail;
1549 	}
1550 
1551 bail:
1552 	mac_perim_exit(mph);
1553 	AGGR_GRP_REFRELE(grp);
1554 	return (rc);
1555 }
1556 
1557 /*ARGSUSED*/
1558 static void
1559 aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
1560 {
1561 	miocnak(q, mp, 0, ENOTSUP);
1562 }
1563 
1564 static int
1565 aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val)
1566 {
1567 	aggr_port_t	*port;
1568 	uint_t		stat_index;
1569 
1570 	/* We only aggregate counter statistics. */
1571 	if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) ||
1572 	    IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) {
1573 		return (ENOTSUP);
1574 	}
1575 
1576 	/*
1577 	 * Counter statistics for a group are computed by aggregating the
1578 	 * counters of the members MACs while they were aggregated, plus
1579 	 * the residual counter of the group itself, which is updated each
1580 	 * time a MAC is removed from the group.
1581 	 */
1582 	*val = 0;
1583 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1584 		/* actual port statistic */
1585 		*val += aggr_port_stat(port, stat);
1586 		/*
1587 		 * minus the port stat when it was added, plus any residual
1588 		 * amount for the group.
1589 		 */
1590 		if (IS_MAC_STAT(stat)) {
1591 			stat_index = stat - MAC_STAT_MIN;
1592 			*val -= port->lp_stat[stat_index];
1593 			*val += grp->lg_stat[stat_index];
1594 		} else if (IS_MACTYPE_STAT(stat)) {
1595 			stat_index = stat - MACTYPE_STAT_MIN;
1596 			*val -= port->lp_ether_stat[stat_index];
1597 			*val += grp->lg_ether_stat[stat_index];
1598 		}
1599 	}
1600 	return (0);
1601 }
1602 
1603 static int
1604 aggr_m_stat(void *arg, uint_t stat, uint64_t *val)
1605 {
1606 	aggr_grp_t		*grp = arg;
1607 	mac_perim_handle_t	mph;
1608 	int			rval = 0;
1609 
1610 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1611 
1612 	switch (stat) {
1613 	case MAC_STAT_IFSPEED:
1614 		*val = grp->lg_ifspeed;
1615 		break;
1616 
1617 	case ETHER_STAT_LINK_DUPLEX:
1618 		*val = grp->lg_link_duplex;
1619 		break;
1620 
1621 	default:
1622 		/*
1623 		 * For all other statistics, we return the aggregated stat
1624 		 * from the underlying ports.  aggr_grp_stat() will set
1625 		 * rval appropriately if the statistic isn't a counter.
1626 		 */
1627 		rval = aggr_grp_stat(grp, stat, val);
1628 	}
1629 
1630 	mac_perim_exit(mph);
1631 	return (rval);
1632 }
1633 
1634 static int
1635 aggr_m_start(void *arg)
1636 {
1637 	aggr_grp_t *grp = arg;
1638 	aggr_port_t *port;
1639 	mac_perim_handle_t mph, pmph;
1640 
1641 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1642 
1643 	/*
1644 	 * Attempts to start all configured members of the group.
1645 	 * Group members will be attached when their link-up notification
1646 	 * is received.
1647 	 */
1648 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1649 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1650 		if (aggr_port_start(port) != 0) {
1651 			mac_perim_exit(pmph);
1652 			continue;
1653 		}
1654 
1655 		/*
1656 		 * Turn on the promiscuous mode if it is required to receive
1657 		 * the non-primary address over a port, or the promiscous
1658 		 * mode is enabled over the aggr.
1659 		 */
1660 		if (grp->lg_promisc || port->lp_prom_addr != NULL) {
1661 			if (aggr_port_promisc(port, B_TRUE) != 0)
1662 				aggr_port_stop(port);
1663 		}
1664 		mac_perim_exit(pmph);
1665 	}
1666 
1667 	grp->lg_started = B_TRUE;
1668 
1669 	mac_perim_exit(mph);
1670 	return (0);
1671 }
1672 
1673 static void
1674 aggr_m_stop(void *arg)
1675 {
1676 	aggr_grp_t *grp = arg;
1677 	aggr_port_t *port;
1678 	mac_perim_handle_t mph, pmph;
1679 
1680 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1681 
1682 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1683 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1684 
1685 		/* reset port promiscuous mode */
1686 		(void) aggr_port_promisc(port, B_FALSE);
1687 
1688 		aggr_port_stop(port);
1689 		mac_perim_exit(pmph);
1690 	}
1691 
1692 	grp->lg_started = B_FALSE;
1693 	mac_perim_exit(mph);
1694 }
1695 
1696 static int
1697 aggr_m_promisc(void *arg, boolean_t on)
1698 {
1699 	aggr_grp_t *grp = arg;
1700 	aggr_port_t *port;
1701 	boolean_t link_state_changed = B_FALSE;
1702 	mac_perim_handle_t mph, pmph;
1703 
1704 	AGGR_GRP_REFHOLD(grp);
1705 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1706 
1707 	ASSERT(!grp->lg_closing);
1708 
1709 	if (on == grp->lg_promisc)
1710 		goto bail;
1711 
1712 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
1713 		int	err = 0;
1714 
1715 		mac_perim_enter_by_mh(port->lp_mh, &pmph);
1716 		AGGR_PORT_REFHOLD(port);
1717 		if (!on && (port->lp_prom_addr == NULL))
1718 			err = aggr_port_promisc(port, B_FALSE);
1719 		else if (on && port->lp_started)
1720 			err = aggr_port_promisc(port, B_TRUE);
1721 
1722 		if (err != 0) {
1723 			if (aggr_grp_detach_port(grp, port))
1724 				link_state_changed = B_TRUE;
1725 		} else {
1726 			/*
1727 			 * If a port was detached because of a previous
1728 			 * failure changing the promiscuity, the port
1729 			 * is reattached when it successfully changes
1730 			 * the promiscuity now, and this might cause
1731 			 * the link state of the aggregation to change.
1732 			 */
1733 			if (aggr_grp_attach_port(grp, port))
1734 				link_state_changed = B_TRUE;
1735 		}
1736 		mac_perim_exit(pmph);
1737 		AGGR_PORT_REFRELE(port);
1738 	}
1739 
1740 	grp->lg_promisc = on;
1741 
1742 	if (link_state_changed)
1743 		mac_link_update(grp->lg_mh, grp->lg_link_state);
1744 
1745 bail:
1746 	mac_perim_exit(mph);
1747 	AGGR_GRP_REFRELE(grp);
1748 
1749 	return (0);
1750 }
1751 
1752 static void
1753 aggr_grp_port_rename(const char *new_name, void *arg)
1754 {
1755 	/*
1756 	 * aggr port's mac client name is the format of "aggr link name" plus
1757 	 * AGGR_PORT_NAME_DELIMIT plus "underneath link name".
1758 	 */
1759 	int aggr_len, link_len, clnt_name_len, i;
1760 	char *str_end, *str_st, *str_del;
1761 	char aggr_name[MAXNAMELEN];
1762 	char link_name[MAXNAMELEN];
1763 	char *clnt_name;
1764 	aggr_grp_t *aggr_grp = arg;
1765 	aggr_port_t *aggr_port = aggr_grp->lg_ports;
1766 
1767 	for (i = 0; i < aggr_grp->lg_nports; i++) {
1768 		clnt_name = mac_client_name(aggr_port->lp_mch);
1769 		clnt_name_len = strlen(clnt_name);
1770 		str_st = clnt_name;
1771 		str_end = &(clnt_name[clnt_name_len]);
1772 		str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT);
1773 		ASSERT(str_del != NULL);
1774 		aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st);
1775 		link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del);
1776 		bzero(aggr_name, MAXNAMELEN);
1777 		bzero(link_name, MAXNAMELEN);
1778 		bcopy(clnt_name, aggr_name, aggr_len);
1779 		bcopy(str_del, link_name, link_len + 1);
1780 		bzero(clnt_name, MAXNAMELEN);
1781 		(void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name,
1782 		    link_name);
1783 
1784 		(void) mac_rename_primary(aggr_port->lp_mh, NULL);
1785 		aggr_port = aggr_port->lp_next;
1786 	}
1787 }
1788 
1789 /*
1790  * Initialize the capabilities that are advertised for the group
1791  * according to the capabilities of the constituent ports.
1792  */
1793 static boolean_t
1794 aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
1795 {
1796 	aggr_grp_t *grp = arg;
1797 
1798 	switch (cap) {
1799 	case MAC_CAPAB_HCKSUM: {
1800 		uint32_t *hcksum_txflags = cap_data;
1801 		*hcksum_txflags = grp->lg_hcksum_txflags;
1802 		break;
1803 	}
1804 	case MAC_CAPAB_LSO: {
1805 		mac_capab_lso_t *cap_lso = cap_data;
1806 
1807 		if (grp->lg_lso) {
1808 			*cap_lso = grp->lg_cap_lso;
1809 			break;
1810 		} else {
1811 			return (B_FALSE);
1812 		}
1813 	}
1814 	case MAC_CAPAB_NO_NATIVEVLAN:
1815 		return (!grp->lg_vlan);
1816 	case MAC_CAPAB_NO_ZCOPY:
1817 		return (!grp->lg_zcopy);
1818 	case MAC_CAPAB_RINGS: {
1819 		mac_capab_rings_t *cap_rings = cap_data;
1820 
1821 		if (cap_rings->mr_type == MAC_RING_TYPE_RX) {
1822 			cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC;
1823 			cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt;
1824 			cap_rings->mr_rget = aggr_fill_ring;
1825 
1826 			/*
1827 			 * An aggregation advertises only one (pseudo) RX
1828 			 * group, which virtualizes the main/primary group of
1829 			 * the underlying devices.
1830 			 */
1831 			cap_rings->mr_gnum = 1;
1832 			cap_rings->mr_gget = aggr_fill_group;
1833 			cap_rings->mr_gaddring = NULL;
1834 			cap_rings->mr_gremring = NULL;
1835 		} else {
1836 			return (B_FALSE);
1837 		}
1838 		break;
1839 	}
1840 	case MAC_CAPAB_AGGR:
1841 	{
1842 		mac_capab_aggr_t *aggr_cap;
1843 
1844 		if (cap_data != NULL) {
1845 			aggr_cap = cap_data;
1846 			aggr_cap->mca_rename_fn = aggr_grp_port_rename;
1847 			aggr_cap->mca_unicst = aggr_m_unicst;
1848 		}
1849 		return (B_TRUE);
1850 	}
1851 	default:
1852 		return (B_FALSE);
1853 	}
1854 	return (B_TRUE);
1855 }
1856 
1857 /*
1858  * Callback funtion for MAC layer to register groups.
1859  */
1860 static void
1861 aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index,
1862     mac_group_info_t *infop, mac_group_handle_t gh)
1863 {
1864 	aggr_grp_t *grp = arg;
1865 	aggr_pseudo_rx_group_t *rx_group;
1866 
1867 	ASSERT(rtype == MAC_RING_TYPE_RX && index == 0);
1868 	rx_group = &grp->lg_rx_group;
1869 	rx_group->arg_gh = gh;
1870 	rx_group->arg_grp = grp;
1871 
1872 	infop->mgi_driver = (mac_group_driver_t)rx_group;
1873 	infop->mgi_start = NULL;
1874 	infop->mgi_stop = NULL;
1875 	infop->mgi_addmac = aggr_addmac;
1876 	infop->mgi_remmac = aggr_remmac;
1877 	infop->mgi_count = rx_group->arg_ring_cnt;
1878 }
1879 
1880 /*
1881  * Callback funtion for MAC layer to register all rings.
1882  */
1883 static void
1884 aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index,
1885     const int index, mac_ring_info_t *infop, mac_ring_handle_t rh)
1886 {
1887 	aggr_grp_t	*grp = arg;
1888 
1889 	switch (rtype) {
1890 	case MAC_RING_TYPE_RX: {
1891 		aggr_pseudo_rx_group_t	*rx_group = &grp->lg_rx_group;
1892 		aggr_pseudo_rx_ring_t	*rx_ring;
1893 		mac_intr_t		aggr_mac_intr;
1894 
1895 		ASSERT(rg_index == 0);
1896 
1897 		ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt));
1898 		rx_ring = rx_group->arg_rings + index;
1899 		rx_ring->arr_rh = rh;
1900 
1901 		/*
1902 		 * Entrypoint to enable interrupt (disable poll) and
1903 		 * disable interrupt (enable poll).
1904 		 */
1905 		aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring;
1906 		aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr;
1907 		aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr;
1908 
1909 		infop->mri_driver = (mac_ring_driver_t)rx_ring;
1910 		infop->mri_start = aggr_pseudo_start_ring;
1911 		infop->mri_stop = aggr_pseudo_stop_ring;
1912 
1913 		infop->mri_intr = aggr_mac_intr;
1914 		infop->mri_poll = aggr_rx_poll;
1915 		break;
1916 	}
1917 	default:
1918 		break;
1919 	}
1920 }
1921 
1922 static mblk_t *
1923 aggr_rx_poll(void *arg, int bytes_to_pickup)
1924 {
1925 	aggr_pseudo_rx_ring_t *rr_ring = arg;
1926 	aggr_port_t *port = rr_ring->arr_port;
1927 	aggr_grp_t *grp = port->lp_grp;
1928 	mblk_t *mp_chain, *mp, **mpp;
1929 
1930 	mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup);
1931 
1932 	if (grp->lg_lacp_mode == AGGR_LACP_OFF)
1933 		return (mp_chain);
1934 
1935 	mpp = &mp_chain;
1936 	while ((mp = *mpp) != NULL) {
1937 		if (MBLKL(mp) >= sizeof (struct ether_header)) {
1938 			struct ether_header *ehp;
1939 
1940 			ehp = (struct ether_header *)mp->b_rptr;
1941 			if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) {
1942 				*mpp = mp->b_next;
1943 				mp->b_next = NULL;
1944 				aggr_recv_lacp(port,
1945 				    (mac_resource_handle_t)rr_ring, mp);
1946 				continue;
1947 			}
1948 		}
1949 
1950 		if (!port->lp_collector_enabled) {
1951 			*mpp = mp->b_next;
1952 			mp->b_next = NULL;
1953 			freemsg(mp);
1954 			continue;
1955 		}
1956 		mpp = &mp->b_next;
1957 	}
1958 	return (mp_chain);
1959 }
1960 
1961 static int
1962 aggr_addmac(void *arg, const uint8_t *mac_addr)
1963 {
1964 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
1965 	aggr_unicst_addr_t	*addr, **pprev;
1966 	aggr_grp_t		*grp = rx_group->arg_grp;
1967 	aggr_port_t		*port, *p;
1968 	mac_perim_handle_t	mph;
1969 	int			err = 0;
1970 
1971 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
1972 
1973 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
1974 		mac_perim_exit(mph);
1975 		return (0);
1976 	}
1977 
1978 	/*
1979 	 * Insert this mac address into the list of mac addresses owned by
1980 	 * the aggregation pseudo group.
1981 	 */
1982 	pprev = &rx_group->arg_macaddr;
1983 	while ((addr = *pprev) != NULL) {
1984 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) {
1985 			mac_perim_exit(mph);
1986 			return (EEXIST);
1987 		}
1988 		pprev = &addr->aua_next;
1989 	}
1990 	addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP);
1991 	bcopy(mac_addr, addr->aua_addr, ETHERADDRL);
1992 	addr->aua_next = NULL;
1993 	*pprev = addr;
1994 
1995 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
1996 		if ((err = aggr_port_addmac(port, mac_addr)) != 0)
1997 			break;
1998 
1999 	if (err != 0) {
2000 		for (p = grp->lg_ports; p != port; p = p->lp_next)
2001 			aggr_port_remmac(p, mac_addr);
2002 
2003 		*pprev = NULL;
2004 		kmem_free(addr, sizeof (aggr_unicst_addr_t));
2005 	}
2006 
2007 	mac_perim_exit(mph);
2008 	return (err);
2009 }
2010 
2011 static int
2012 aggr_remmac(void *arg, const uint8_t *mac_addr)
2013 {
2014 	aggr_pseudo_rx_group_t	*rx_group = (aggr_pseudo_rx_group_t *)arg;
2015 	aggr_unicst_addr_t	*addr, **pprev;
2016 	aggr_grp_t		*grp = rx_group->arg_grp;
2017 	aggr_port_t		*port;
2018 	mac_perim_handle_t	mph;
2019 	int			err = 0;
2020 
2021 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2022 
2023 	if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) {
2024 		mac_perim_exit(mph);
2025 		return (0);
2026 	}
2027 
2028 	/*
2029 	 * Insert this mac address into the list of mac addresses owned by
2030 	 * the aggregation pseudo group.
2031 	 */
2032 	pprev = &rx_group->arg_macaddr;
2033 	while ((addr = *pprev) != NULL) {
2034 		if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) {
2035 			pprev = &addr->aua_next;
2036 			continue;
2037 		}
2038 		break;
2039 	}
2040 	if (addr == NULL) {
2041 		mac_perim_exit(mph);
2042 		return (EINVAL);
2043 	}
2044 
2045 	for (port = grp->lg_ports; port != NULL; port = port->lp_next)
2046 		aggr_port_remmac(port, mac_addr);
2047 
2048 	*pprev = addr->aua_next;
2049 	kmem_free(addr, sizeof (aggr_unicst_addr_t));
2050 
2051 	mac_perim_exit(mph);
2052 	return (err);
2053 }
2054 
2055 /*
2056  * Add or remove the multicast addresses that are defined for the group
2057  * to or from the specified port.
2058  *
2059  * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port
2060  * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is
2061  * called when the port is either stopped or detached.
2062  */
2063 void
2064 aggr_grp_multicst_port(aggr_port_t *port, boolean_t add)
2065 {
2066 	aggr_grp_t *grp = port->lp_grp;
2067 
2068 	ASSERT(MAC_PERIM_HELD(port->lp_mh));
2069 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2070 
2071 	if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED)
2072 		return;
2073 
2074 	mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add);
2075 }
2076 
2077 static int
2078 aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
2079 {
2080 	aggr_grp_t *grp = arg;
2081 	aggr_port_t *port = NULL;
2082 	mac_perim_handle_t mph;
2083 	int err = 0, cerr;
2084 
2085 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2086 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2087 		if (port->lp_state != AGGR_PORT_STATE_ATTACHED ||
2088 		    !port->lp_started) {
2089 			continue;
2090 		}
2091 		cerr = aggr_port_multicst(port, add, addrp);
2092 		if (cerr != 0 && err == 0)
2093 			err = cerr;
2094 	}
2095 	mac_perim_exit(mph);
2096 	return (err);
2097 }
2098 
2099 static int
2100 aggr_m_unicst(void *arg, const uint8_t *macaddr)
2101 {
2102 	aggr_grp_t *grp = arg;
2103 	mac_perim_handle_t mph;
2104 	int err;
2105 
2106 	mac_perim_enter_by_mh(grp->lg_mh, &mph);
2107 	err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr,
2108 	    0, 0);
2109 	mac_perim_exit(mph);
2110 	return (err);
2111 }
2112 
2113 /*
2114  * Initialize the capabilities that are advertised for the group
2115  * according to the capabilities of the constituent ports.
2116  */
2117 static void
2118 aggr_grp_capab_set(aggr_grp_t *grp)
2119 {
2120 	uint32_t cksum;
2121 	aggr_port_t *port;
2122 	mac_capab_lso_t cap_lso;
2123 
2124 	ASSERT(grp->lg_mh == NULL);
2125 	ASSERT(grp->lg_ports != NULL);
2126 
2127 	grp->lg_hcksum_txflags = (uint32_t)-1;
2128 	grp->lg_zcopy = B_TRUE;
2129 	grp->lg_vlan = B_TRUE;
2130 
2131 	grp->lg_lso = B_TRUE;
2132 	grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1;
2133 	grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1;
2134 
2135 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2136 		if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum))
2137 			cksum = 0;
2138 		grp->lg_hcksum_txflags &= cksum;
2139 
2140 		grp->lg_vlan &=
2141 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL);
2142 
2143 		grp->lg_zcopy &=
2144 		    !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL);
2145 
2146 		grp->lg_lso &=
2147 		    mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso);
2148 		if (grp->lg_lso) {
2149 			grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags;
2150 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2151 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
2152 				grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max =
2153 				    cap_lso.lso_basic_tcp_ipv4.lso_max;
2154 		}
2155 	}
2156 }
2157 
2158 /*
2159  * Checks whether the capabilities of the port being added are compatible
2160  * with the current capabilities of the aggregation.
2161  */
2162 static boolean_t
2163 aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port)
2164 {
2165 	uint32_t hcksum_txflags;
2166 
2167 	ASSERT(grp->lg_ports != NULL);
2168 
2169 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) &
2170 	    grp->lg_vlan) != grp->lg_vlan) {
2171 		return (B_FALSE);
2172 	}
2173 
2174 	if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) &
2175 	    grp->lg_zcopy) != grp->lg_zcopy) {
2176 		return (B_FALSE);
2177 	}
2178 
2179 	if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) {
2180 		if (grp->lg_hcksum_txflags != 0)
2181 			return (B_FALSE);
2182 	} else if ((hcksum_txflags & grp->lg_hcksum_txflags) !=
2183 	    grp->lg_hcksum_txflags) {
2184 		return (B_FALSE);
2185 	}
2186 
2187 	if (grp->lg_lso) {
2188 		mac_capab_lso_t cap_lso;
2189 
2190 		if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) {
2191 			if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) !=
2192 			    grp->lg_cap_lso.lso_flags)
2193 				return (B_FALSE);
2194 			if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max >
2195 			    cap_lso.lso_basic_tcp_ipv4.lso_max)
2196 				return (B_FALSE);
2197 		} else {
2198 			return (B_FALSE);
2199 		}
2200 	}
2201 
2202 	return (B_TRUE);
2203 }
2204 
2205 /*
2206  * Returns the maximum SDU according to the SDU of the constituent ports.
2207  */
2208 static uint_t
2209 aggr_grp_max_sdu(aggr_grp_t *grp)
2210 {
2211 	uint_t max_sdu = (uint_t)-1;
2212 	aggr_port_t *port;
2213 
2214 	ASSERT(grp->lg_ports != NULL);
2215 
2216 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2217 		uint_t port_sdu_max;
2218 
2219 		mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2220 		if (max_sdu > port_sdu_max)
2221 			max_sdu = port_sdu_max;
2222 	}
2223 
2224 	return (max_sdu);
2225 }
2226 
2227 /*
2228  * Checks if the maximum SDU of the specified port is compatible
2229  * with the maximum SDU of the specified aggregation group, returns
2230  * B_TRUE if it is, B_FALSE otherwise.
2231  */
2232 static boolean_t
2233 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port)
2234 {
2235 	uint_t port_sdu_max;
2236 
2237 	mac_sdu_get(port->lp_mh, NULL, &port_sdu_max);
2238 	return (port_sdu_max >= grp->lg_max_sdu);
2239 }
2240 
2241 /*
2242  * Returns the maximum margin according to the margin of the constituent ports.
2243  */
2244 static uint32_t
2245 aggr_grp_max_margin(aggr_grp_t *grp)
2246 {
2247 	uint32_t margin = UINT32_MAX;
2248 	aggr_port_t *port;
2249 
2250 	ASSERT(grp->lg_mh == NULL);
2251 	ASSERT(grp->lg_ports != NULL);
2252 
2253 	for (port = grp->lg_ports; port != NULL; port = port->lp_next) {
2254 		if (margin > port->lp_margin)
2255 			margin = port->lp_margin;
2256 	}
2257 
2258 	grp->lg_margin = margin;
2259 	return (margin);
2260 }
2261 
2262 /*
2263  * Checks if the maximum margin of the specified port is compatible
2264  * with the maximum margin of the specified aggregation group, returns
2265  * B_TRUE if it is, B_FALSE otherwise.
2266  */
2267 static boolean_t
2268 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port)
2269 {
2270 	if (port->lp_margin >= grp->lg_margin)
2271 		return (B_TRUE);
2272 
2273 	/*
2274 	 * See whether the current margin value is allowed to be changed to
2275 	 * the new value.
2276 	 */
2277 	if (!mac_margin_update(grp->lg_mh, port->lp_margin))
2278 		return (B_FALSE);
2279 
2280 	grp->lg_margin = port->lp_margin;
2281 	return (B_TRUE);
2282 }
2283 
2284 /*
2285  * Set MTU on individual ports of an aggregation group
2286  */
2287 static int
2288 aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu,
2289     uint32_t *old_mtu)
2290 {
2291 	boolean_t 		removed = B_FALSE;
2292 	mac_perim_handle_t	mph;
2293 	mac_diag_t		diag;
2294 	int			err, rv, retry = 0;
2295 
2296 	if (port->lp_mah != NULL) {
2297 		(void) mac_unicast_remove(port->lp_mch, port->lp_mah);
2298 		port->lp_mah = NULL;
2299 		removed = B_TRUE;
2300 	}
2301 	err = mac_set_mtu(port->lp_mh, sdu, old_mtu);
2302 try_again:
2303 	if (removed && (rv = mac_unicast_add(port->lp_mch, NULL,
2304 	    MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK,
2305 	    &port->lp_mah, 0, &diag)) != 0) {
2306 		/*
2307 		 * following is a workaround for a bug in 'bge' driver.
2308 		 * See CR 6794654 for more information and this work around
2309 		 * will be removed once the CR is fixed.
2310 		 */
2311 		if (rv == EIO && retry++ < 3) {
2312 			delay(2 * hz);
2313 			goto try_again;
2314 		}
2315 		/*
2316 		 * if mac_unicast_add() failed while setting the MTU,
2317 		 * detach the port from the group.
2318 		 */
2319 		mac_perim_enter_by_mh(port->lp_mh, &mph);
2320 		(void) aggr_grp_detach_port(grp, port);
2321 		mac_perim_exit(mph);
2322 		cmn_err(CE_WARN, "Unable to restart the port %s while "
2323 		    "setting MTU. Detaching the port from the aggregation.",
2324 		    mac_client_name(port->lp_mch));
2325 	}
2326 	return (err);
2327 }
2328 
2329 static int
2330 aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu)
2331 {
2332 	int			err = 0, i, rv;
2333 	aggr_port_t		*port;
2334 	uint32_t		*mtu;
2335 
2336 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2337 
2338 	/*
2339 	 * If the MTU being set is equal to aggr group's maximum
2340 	 * allowable value, then there is nothing to change
2341 	 */
2342 	if (sdu == grp->lg_max_sdu)
2343 		return (0);
2344 
2345 	/* 0 is aggr group's min sdu */
2346 	if (sdu == 0)
2347 		return (EINVAL);
2348 
2349 	mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP);
2350 	for (port = grp->lg_ports, i = 0; port != NULL && err == 0;
2351 	    port = port->lp_next, i++) {
2352 		err = aggr_set_port_sdu(grp, port, sdu, mtu + i);
2353 	}
2354 	if (err != 0) {
2355 		/* recover from error: reset the mtus of the ports */
2356 		aggr_port_t *tmp;
2357 
2358 		for (tmp = grp->lg_ports, i = 0; tmp != port;
2359 		    tmp = tmp->lp_next, i++) {
2360 			(void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL);
2361 		}
2362 		goto bail;
2363 	}
2364 	grp->lg_max_sdu = aggr_grp_max_sdu(grp);
2365 	rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu);
2366 	ASSERT(rv == 0);
2367 bail:
2368 	kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports);
2369 	return (err);
2370 }
2371 
2372 /*
2373  * Callback functions for set/get of properties
2374  */
2375 /*ARGSUSED*/
2376 static int
2377 aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2378     uint_t pr_valsize, const void *pr_val)
2379 {
2380 	int 		err = ENOTSUP;
2381 	aggr_grp_t 	*grp = m_driver;
2382 
2383 	switch (pr_num) {
2384 	case MAC_PROP_MTU: {
2385 		uint32_t 	mtu;
2386 
2387 		if (pr_valsize < sizeof (mtu)) {
2388 			err = EINVAL;
2389 			break;
2390 		}
2391 		bcopy(pr_val, &mtu, sizeof (mtu));
2392 		err = aggr_sdu_update(grp, mtu);
2393 		break;
2394 	}
2395 	default:
2396 		break;
2397 	}
2398 	return (err);
2399 }
2400 
2401 int
2402 aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_range_t *range)
2403 {
2404 	mac_propval_range_t		*vals;
2405 	mac_propval_uint32_range_t	*ur;
2406 	aggr_port_t			*port;
2407 	mac_perim_handle_t		mph;
2408 	mac_prop_t 			macprop;
2409 	uint_t 				perm, i;
2410 	uint32_t 			min = 0, max = (uint32_t)-1;
2411 	int 				err = 0;
2412 
2413 	ASSERT(MAC_PERIM_HELD(grp->lg_mh));
2414 
2415 	vals = kmem_alloc(sizeof (mac_propval_range_t) * grp->lg_nports,
2416 	    KM_SLEEP);
2417 	macprop.mp_id = MAC_PROP_MTU;
2418 	macprop.mp_name = "mtu";
2419 	macprop.mp_flags = MAC_PROP_POSSIBLE;
2420 
2421 	for (port = grp->lg_ports, i = 0; port != NULL;
2422 	    port = port->lp_next, i++) {
2423 		mac_perim_enter_by_mh(port->lp_mh, &mph);
2424 		err = mac_get_prop(port->lp_mh, &macprop, vals + i,
2425 		    sizeof (mac_propval_range_t), &perm);
2426 		mac_perim_exit(mph);
2427 		if (err != 0)
2428 			break;
2429 	}
2430 	/*
2431 	 * if any of the underlying ports does not support changing MTU then
2432 	 * just return ENOTSUP
2433 	 */
2434 	if (port != NULL) {
2435 		ASSERT(err != 0);
2436 		goto done;
2437 	}
2438 	range->mpr_count = 1;
2439 	range->mpr_type = MAC_PROPVAL_UINT32;
2440 	for (i = 0; i < grp->lg_nports; i++) {
2441 		ur = &((vals + i)->range_uint32[0]);
2442 		/*
2443 		 * Take max of the min, for range_min; that is the minimum
2444 		 * MTU value for an aggregation is the maximum of the
2445 		 * minimum values of all the underlying ports
2446 		 */
2447 		if (ur->mpur_min > min)
2448 			min = ur->mpur_min;
2449 		/* Take min of the max, for range_max */
2450 		if (ur->mpur_max < max)
2451 			max = ur->mpur_max;
2452 	}
2453 	range->range_uint32[0].mpur_min = min;
2454 	range->range_uint32[0].mpur_max = max;
2455 done:
2456 	kmem_free(vals, sizeof (mac_propval_range_t) * grp->lg_nports);
2457 	return (err);
2458 }
2459 
2460 /*ARGSUSED*/
2461 static int
2462 aggr_m_getprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
2463     uint_t pr_flags, uint_t pr_valsize, void *pr_val, uint_t *perm)
2464 {
2465 	mac_propval_range_t 	range;
2466 	int 			err = ENOTSUP;
2467 	aggr_grp_t		*grp = m_driver;
2468 
2469 	switch (pr_num) {
2470 	case MAC_PROP_MTU:
2471 		if (!(pr_flags & MAC_PROP_POSSIBLE))
2472 			return (ENOTSUP);
2473 		if (pr_valsize < sizeof (mac_propval_range_t))
2474 			return (EINVAL);
2475 		if ((err = aggr_grp_possible_mtu_range(grp, &range)) != 0)
2476 			return (err);
2477 		bcopy(&range, pr_val, sizeof (range));
2478 		return (0);
2479 	}
2480 	return (err);
2481 }
2482