17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5f12af565Snd99603 * Common Development and Distribution License (the "License"). 6f12af565Snd99603 * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 220591ddd0SPrakash Jalan * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 23*09b7f21aSRobert Mustacchi * Copyright 2015 Joyent, Inc. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* 277c478bd9Sstevel@tonic-gate * IEEE 802.3ad Link Aggregation -- Link Aggregation Groups. 287c478bd9Sstevel@tonic-gate * 297c478bd9Sstevel@tonic-gate * An instance of the structure aggr_grp_t is allocated for each 307c478bd9Sstevel@tonic-gate * link aggregation group. When created, aggr_grp_t objects are 31210db224Sericheng * entered into the aggr_grp_hash hash table maintained by the modhash 32d62bc4baSyz147064 * module. The hash key is the linkid associated with the link 33d62bc4baSyz147064 * aggregation group. 347c478bd9Sstevel@tonic-gate * 357c478bd9Sstevel@tonic-gate * A set of MAC ports are associated with each association group. 360dc2366fSVenugopal Iyer * 370dc2366fSVenugopal Iyer * Aggr pseudo TX rings 380dc2366fSVenugopal Iyer * -------------------- 390dc2366fSVenugopal Iyer * The underlying ports (NICs) in an aggregation can have TX rings. To 400dc2366fSVenugopal Iyer * enhance aggr's performance, these TX rings are made available to the 410dc2366fSVenugopal Iyer * aggr layer as pseudo TX rings. The concept of pseudo rings are not new. 420dc2366fSVenugopal Iyer * They are already present and implemented on the RX side. It is called 430dc2366fSVenugopal Iyer * as pseudo RX rings. The same concept is extended to the TX side where 440dc2366fSVenugopal Iyer * each TX ring of an underlying port is reflected in aggr as a pseudo 450dc2366fSVenugopal Iyer * TX ring. Thus each pseudo TX ring will map to a specific hardware TX 460dc2366fSVenugopal Iyer * ring. Even in the case of a NIC that does not have a TX ring, a pseudo 470dc2366fSVenugopal Iyer * TX ring is given to the aggregation layer. 480dc2366fSVenugopal Iyer * 490dc2366fSVenugopal Iyer * With this change, the outgoing stack depth looks much better: 500dc2366fSVenugopal Iyer * 510dc2366fSVenugopal Iyer * mac_tx() -> mac_tx_aggr_mode() -> mac_tx_soft_ring_process() -> 520dc2366fSVenugopal Iyer * mac_tx_send() -> aggr_ring_rx() -> <driver>_ring_tx() 530dc2366fSVenugopal Iyer * 540dc2366fSVenugopal Iyer * Two new modes are introduced to mac_tx() to handle aggr pseudo TX rings: 550dc2366fSVenugopal Iyer * SRS_TX_AGGR and SRS_TX_BW_AGGR. 560dc2366fSVenugopal Iyer * 570dc2366fSVenugopal Iyer * In SRS_TX_AGGR mode, mac_tx_aggr_mode() routine is called. This routine 580dc2366fSVenugopal Iyer * invokes an aggr function, aggr_find_tx_ring(), to find a (pseudo) TX 590dc2366fSVenugopal Iyer * ring belonging to a port on which the packet has to be sent. 600dc2366fSVenugopal Iyer * aggr_find_tx_ring() first finds the outgoing port based on L2/L3/L4 610dc2366fSVenugopal Iyer * policy and then uses the fanout_hint passed to it to pick a TX ring from 620dc2366fSVenugopal Iyer * the selected port. 630dc2366fSVenugopal Iyer * 640dc2366fSVenugopal Iyer * In SRS_TX_BW_AGGR mode, mac_tx_bw_mode() function is called where 650dc2366fSVenugopal Iyer * bandwidth limit is applied first on the outgoing packet and the packets 660dc2366fSVenugopal Iyer * allowed to go out would call mac_tx_aggr_mode() to send the packet on a 670dc2366fSVenugopal Iyer * particular TX ring. 687c478bd9Sstevel@tonic-gate */ 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate #include <sys/types.h> 717c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 727c478bd9Sstevel@tonic-gate #include <sys/conf.h> 737c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 74da14cebeSEric Cheng #include <sys/disp.h> 757c478bd9Sstevel@tonic-gate #include <sys/list.h> 767c478bd9Sstevel@tonic-gate #include <sys/ksynch.h> 777c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 787c478bd9Sstevel@tonic-gate #include <sys/stream.h> 797c478bd9Sstevel@tonic-gate #include <sys/modctl.h> 807c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 817c478bd9Sstevel@tonic-gate #include <sys/sunddi.h> 827c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 837c478bd9Sstevel@tonic-gate #include <sys/stat.h> 84210db224Sericheng #include <sys/modhash.h> 85d62bc4baSyz147064 #include <sys/id_space.h> 867c478bd9Sstevel@tonic-gate #include <sys/strsun.h> 872b24ab6bSSebastien Roy #include <sys/cred.h> 887c478bd9Sstevel@tonic-gate #include <sys/dlpi.h> 892b24ab6bSSebastien Roy #include <sys/zone.h> 90da14cebeSEric Cheng #include <sys/mac_provider.h> 91d62bc4baSyz147064 #include <sys/dls.h> 92d62bc4baSyz147064 #include <sys/vlan.h> 937c478bd9Sstevel@tonic-gate #include <sys/aggr.h> 947c478bd9Sstevel@tonic-gate #include <sys/aggr_impl.h> 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate static int aggr_m_start(void *); 977c478bd9Sstevel@tonic-gate static void aggr_m_stop(void *); 987c478bd9Sstevel@tonic-gate static int aggr_m_promisc(void *, boolean_t); 997c478bd9Sstevel@tonic-gate static int aggr_m_multicst(void *, boolean_t, const uint8_t *); 1007c478bd9Sstevel@tonic-gate static int aggr_m_unicst(void *, const uint8_t *); 101ba2e4443Sseb static int aggr_m_stat(void *, uint_t, uint64_t *); 1027c478bd9Sstevel@tonic-gate static void aggr_m_ioctl(void *, queue_t *, mblk_t *); 103ba2e4443Sseb static boolean_t aggr_m_capab_get(void *, mac_capab_t, void *); 104986cab2cSGirish Moodalbail static int aggr_m_setprop(void *, const char *, mac_prop_id_t, uint_t, 105986cab2cSGirish Moodalbail const void *); 1060dc2366fSVenugopal Iyer static void aggr_m_propinfo(void *, const char *, mac_prop_id_t, 1070dc2366fSVenugopal Iyer mac_prop_info_handle_t); 108986cab2cSGirish Moodalbail 109d62bc4baSyz147064 static aggr_port_t *aggr_grp_port_lookup(aggr_grp_t *, datalink_id_t); 1104deae11aSyz147064 static int aggr_grp_rem_port(aggr_grp_t *, aggr_port_t *, boolean_t *, 1114deae11aSyz147064 boolean_t *); 112d62bc4baSyz147064 1137c478bd9Sstevel@tonic-gate static void aggr_grp_capab_set(aggr_grp_t *); 1147c478bd9Sstevel@tonic-gate static boolean_t aggr_grp_capab_check(aggr_grp_t *, aggr_port_t *); 115f4420ae7Snd99603 static uint_t aggr_grp_max_sdu(aggr_grp_t *); 116d62bc4baSyz147064 static uint32_t aggr_grp_max_margin(aggr_grp_t *); 117f4420ae7Snd99603 static boolean_t aggr_grp_sdu_check(aggr_grp_t *, aggr_port_t *); 118d62bc4baSyz147064 static boolean_t aggr_grp_margin_check(aggr_grp_t *, aggr_port_t *); 119da14cebeSEric Cheng 120da14cebeSEric Cheng static int aggr_add_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 121da14cebeSEric Cheng static void aggr_rem_pseudo_rx_group(aggr_port_t *, aggr_pseudo_rx_group_t *); 122da14cebeSEric Cheng static int aggr_pseudo_disable_intr(mac_intr_handle_t); 123da14cebeSEric Cheng static int aggr_pseudo_enable_intr(mac_intr_handle_t); 124da14cebeSEric Cheng static int aggr_pseudo_start_ring(mac_ring_driver_t, uint64_t); 125da14cebeSEric Cheng static void aggr_pseudo_stop_ring(mac_ring_driver_t); 126da14cebeSEric Cheng static int aggr_addmac(void *, const uint8_t *); 127da14cebeSEric Cheng static int aggr_remmac(void *, const uint8_t *); 128da14cebeSEric Cheng static mblk_t *aggr_rx_poll(void *, int); 129da14cebeSEric Cheng static void aggr_fill_ring(void *, mac_ring_type_t, const int, 130da14cebeSEric Cheng const int, mac_ring_info_t *, mac_ring_handle_t); 131da14cebeSEric Cheng static void aggr_fill_group(void *, mac_ring_type_t, const int, 132da14cebeSEric Cheng mac_group_info_t *, mac_group_handle_t); 1337c478bd9Sstevel@tonic-gate 1347c478bd9Sstevel@tonic-gate static kmem_cache_t *aggr_grp_cache; 135210db224Sericheng static mod_hash_t *aggr_grp_hash; 136210db224Sericheng static krwlock_t aggr_grp_lock; 137210db224Sericheng static uint_t aggr_grp_cnt; 138d62bc4baSyz147064 static id_space_t *key_ids; 1397c478bd9Sstevel@tonic-gate 1407c478bd9Sstevel@tonic-gate #define GRP_HASHSZ 64 141d62bc4baSyz147064 #define GRP_HASH_KEY(linkid) ((mod_hash_key_t)(uintptr_t)linkid) 142da14cebeSEric Cheng #define AGGR_PORT_NAME_DELIMIT '-' 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate static uchar_t aggr_zero_mac[] = {0, 0, 0, 0, 0, 0}; 1457c478bd9Sstevel@tonic-gate 146986cab2cSGirish Moodalbail #define AGGR_M_CALLBACK_FLAGS \ 1470dc2366fSVenugopal Iyer (MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO) 148ba2e4443Sseb 149ba2e4443Sseb static mac_callbacks_t aggr_m_callbacks = { 150ba2e4443Sseb AGGR_M_CALLBACK_FLAGS, 151ba2e4443Sseb aggr_m_stat, 152ba2e4443Sseb aggr_m_start, 153ba2e4443Sseb aggr_m_stop, 154ba2e4443Sseb aggr_m_promisc, 155ba2e4443Sseb aggr_m_multicst, 156da14cebeSEric Cheng NULL, 1570dc2366fSVenugopal Iyer NULL, 1580dc2366fSVenugopal Iyer NULL, 159ba2e4443Sseb aggr_m_ioctl, 160986cab2cSGirish Moodalbail aggr_m_capab_get, 161986cab2cSGirish Moodalbail NULL, 162986cab2cSGirish Moodalbail NULL, 163986cab2cSGirish Moodalbail aggr_m_setprop, 1640dc2366fSVenugopal Iyer NULL, 1650dc2366fSVenugopal Iyer aggr_m_propinfo 166ba2e4443Sseb }; 167ba2e4443Sseb 1687c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1697c478bd9Sstevel@tonic-gate static int 1707c478bd9Sstevel@tonic-gate aggr_grp_constructor(void *buf, void *arg, int kmflag) 1717c478bd9Sstevel@tonic-gate { 1727c478bd9Sstevel@tonic-gate aggr_grp_t *grp = buf; 1737c478bd9Sstevel@tonic-gate 1747c478bd9Sstevel@tonic-gate bzero(grp, sizeof (*grp)); 175da14cebeSEric Cheng mutex_init(&grp->lg_lacp_lock, NULL, MUTEX_DEFAULT, NULL); 176da14cebeSEric Cheng cv_init(&grp->lg_lacp_cv, NULL, CV_DEFAULT, NULL); 177da14cebeSEric Cheng rw_init(&grp->lg_tx_lock, NULL, RW_DRIVER, NULL); 178da14cebeSEric Cheng mutex_init(&grp->lg_port_lock, NULL, MUTEX_DEFAULT, NULL); 179da14cebeSEric Cheng cv_init(&grp->lg_port_cv, NULL, CV_DEFAULT, NULL); 1800dc2366fSVenugopal Iyer mutex_init(&grp->lg_tx_flowctl_lock, NULL, MUTEX_DEFAULT, NULL); 1810dc2366fSVenugopal Iyer cv_init(&grp->lg_tx_flowctl_cv, NULL, CV_DEFAULT, NULL); 1827c478bd9Sstevel@tonic-gate grp->lg_link_state = LINK_STATE_UNKNOWN; 1837c478bd9Sstevel@tonic-gate return (0); 1847c478bd9Sstevel@tonic-gate } 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1877c478bd9Sstevel@tonic-gate static void 1887c478bd9Sstevel@tonic-gate aggr_grp_destructor(void *buf, void *arg) 1897c478bd9Sstevel@tonic-gate { 1907c478bd9Sstevel@tonic-gate aggr_grp_t *grp = buf; 1917c478bd9Sstevel@tonic-gate 1927c478bd9Sstevel@tonic-gate if (grp->lg_tx_ports != NULL) { 1937c478bd9Sstevel@tonic-gate kmem_free(grp->lg_tx_ports, 1947c478bd9Sstevel@tonic-gate grp->lg_tx_ports_size * sizeof (aggr_port_t *)); 1957c478bd9Sstevel@tonic-gate } 1967c478bd9Sstevel@tonic-gate 197da14cebeSEric Cheng mutex_destroy(&grp->lg_lacp_lock); 198da14cebeSEric Cheng cv_destroy(&grp->lg_lacp_cv); 199da14cebeSEric Cheng mutex_destroy(&grp->lg_port_lock); 200da14cebeSEric Cheng cv_destroy(&grp->lg_port_cv); 201da14cebeSEric Cheng rw_destroy(&grp->lg_tx_lock); 2020dc2366fSVenugopal Iyer mutex_destroy(&grp->lg_tx_flowctl_lock); 2030dc2366fSVenugopal Iyer cv_destroy(&grp->lg_tx_flowctl_cv); 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate void 2077c478bd9Sstevel@tonic-gate aggr_grp_init(void) 2087c478bd9Sstevel@tonic-gate { 2097c478bd9Sstevel@tonic-gate aggr_grp_cache = kmem_cache_create("aggr_grp_cache", 2107c478bd9Sstevel@tonic-gate sizeof (aggr_grp_t), 0, aggr_grp_constructor, 2117c478bd9Sstevel@tonic-gate aggr_grp_destructor, NULL, NULL, NULL, 0); 2127c478bd9Sstevel@tonic-gate 213210db224Sericheng aggr_grp_hash = mod_hash_create_idhash("aggr_grp_hash", 214210db224Sericheng GRP_HASHSZ, mod_hash_null_valdtor); 215210db224Sericheng rw_init(&aggr_grp_lock, NULL, RW_DEFAULT, NULL); 216210db224Sericheng aggr_grp_cnt = 0; 217d62bc4baSyz147064 218d62bc4baSyz147064 /* 219d62bc4baSyz147064 * Allocate an id space to manage key values (when key is not 220d62bc4baSyz147064 * specified). The range of the id space will be from 221d62bc4baSyz147064 * (AGGR_MAX_KEY + 1) to UINT16_MAX, because the LACP protocol 222d62bc4baSyz147064 * uses a 16-bit key. 223d62bc4baSyz147064 */ 224d62bc4baSyz147064 key_ids = id_space_create("aggr_key_ids", AGGR_MAX_KEY + 1, UINT16_MAX); 225d62bc4baSyz147064 ASSERT(key_ids != NULL); 2267c478bd9Sstevel@tonic-gate } 2277c478bd9Sstevel@tonic-gate 228c0192a57Sericheng void 2297c478bd9Sstevel@tonic-gate aggr_grp_fini(void) 2307c478bd9Sstevel@tonic-gate { 231d62bc4baSyz147064 id_space_destroy(key_ids); 232210db224Sericheng rw_destroy(&aggr_grp_lock); 233210db224Sericheng mod_hash_destroy_idhash(aggr_grp_hash); 2347c478bd9Sstevel@tonic-gate kmem_cache_destroy(aggr_grp_cache); 2357c478bd9Sstevel@tonic-gate } 2367c478bd9Sstevel@tonic-gate 237210db224Sericheng uint_t 238210db224Sericheng aggr_grp_count(void) 239210db224Sericheng { 240210db224Sericheng uint_t count; 241210db224Sericheng 242210db224Sericheng rw_enter(&aggr_grp_lock, RW_READER); 243210db224Sericheng count = aggr_grp_cnt; 244210db224Sericheng rw_exit(&aggr_grp_lock); 245210db224Sericheng return (count); 246210db224Sericheng } 247210db224Sericheng 2487c478bd9Sstevel@tonic-gate /* 249da14cebeSEric Cheng * Since both aggr_port_notify_cb() and aggr_port_timer_thread() functions 250da14cebeSEric Cheng * requires the mac perimeter, this function holds a reference of the aggr 251da14cebeSEric Cheng * and aggr won't call mac_unregister() until this reference drops to 0. 252da14cebeSEric Cheng */ 253da14cebeSEric Cheng void 254da14cebeSEric Cheng aggr_grp_port_hold(aggr_port_t *port) 255da14cebeSEric Cheng { 256da14cebeSEric Cheng aggr_grp_t *grp = port->lp_grp; 257da14cebeSEric Cheng 258da14cebeSEric Cheng AGGR_PORT_REFHOLD(port); 259da14cebeSEric Cheng mutex_enter(&grp->lg_port_lock); 260da14cebeSEric Cheng grp->lg_port_ref++; 261da14cebeSEric Cheng mutex_exit(&grp->lg_port_lock); 262da14cebeSEric Cheng } 263da14cebeSEric Cheng 264da14cebeSEric Cheng /* 265da14cebeSEric Cheng * Release the reference of the grp and inform aggr_grp_delete() calling 266da14cebeSEric Cheng * mac_unregister() is now safe. 267da14cebeSEric Cheng */ 268da14cebeSEric Cheng void 269da14cebeSEric Cheng aggr_grp_port_rele(aggr_port_t *port) 270da14cebeSEric Cheng { 271da14cebeSEric Cheng aggr_grp_t *grp = port->lp_grp; 272da14cebeSEric Cheng 273da14cebeSEric Cheng mutex_enter(&grp->lg_port_lock); 274da14cebeSEric Cheng if (--grp->lg_port_ref == 0) 275da14cebeSEric Cheng cv_signal(&grp->lg_port_cv); 276da14cebeSEric Cheng mutex_exit(&grp->lg_port_lock); 277da14cebeSEric Cheng AGGR_PORT_REFRELE(port); 278da14cebeSEric Cheng } 279da14cebeSEric Cheng 280da14cebeSEric Cheng /* 281da14cebeSEric Cheng * Wait for the port's lacp timer thread and the port's notification callback 282da14cebeSEric Cheng * to exit. 283da14cebeSEric Cheng */ 284da14cebeSEric Cheng void 285da14cebeSEric Cheng aggr_grp_port_wait(aggr_grp_t *grp) 286da14cebeSEric Cheng { 287da14cebeSEric Cheng mutex_enter(&grp->lg_port_lock); 288da14cebeSEric Cheng if (grp->lg_port_ref != 0) 289da14cebeSEric Cheng cv_wait(&grp->lg_port_cv, &grp->lg_port_lock); 290da14cebeSEric Cheng mutex_exit(&grp->lg_port_lock); 291da14cebeSEric Cheng } 292da14cebeSEric Cheng 293da14cebeSEric Cheng /* 2947c478bd9Sstevel@tonic-gate * Attach a port to a link aggregation group. 2957c478bd9Sstevel@tonic-gate * 2967c478bd9Sstevel@tonic-gate * A port is attached to a link aggregation group once its speed 2977c478bd9Sstevel@tonic-gate * and link state have been verified. 2987c478bd9Sstevel@tonic-gate * 2997c478bd9Sstevel@tonic-gate * Returns B_TRUE if the group link state or speed has changed. If 3007c478bd9Sstevel@tonic-gate * it's the case, the caller must notify the MAC layer via a call 3017c478bd9Sstevel@tonic-gate * to mac_link(). 3027c478bd9Sstevel@tonic-gate */ 3037c478bd9Sstevel@tonic-gate boolean_t 3047c478bd9Sstevel@tonic-gate aggr_grp_attach_port(aggr_grp_t *grp, aggr_port_t *port) 3057c478bd9Sstevel@tonic-gate { 3064deae11aSyz147064 boolean_t link_state_changed = B_FALSE; 3077c478bd9Sstevel@tonic-gate 308da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 309da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(port->lp_mh)); 3107c478bd9Sstevel@tonic-gate 3117c478bd9Sstevel@tonic-gate if (port->lp_state == AGGR_PORT_STATE_ATTACHED) 3127c478bd9Sstevel@tonic-gate return (B_FALSE); 3137c478bd9Sstevel@tonic-gate 3147c478bd9Sstevel@tonic-gate /* 3157c478bd9Sstevel@tonic-gate * Validate the MAC port link speed and update the group 3167c478bd9Sstevel@tonic-gate * link speed if needed. 3177c478bd9Sstevel@tonic-gate */ 3187c478bd9Sstevel@tonic-gate if (port->lp_ifspeed == 0 || 3197c478bd9Sstevel@tonic-gate port->lp_link_state != LINK_STATE_UP || 3207c478bd9Sstevel@tonic-gate port->lp_link_duplex != LINK_DUPLEX_FULL) { 3217c478bd9Sstevel@tonic-gate /* 3227c478bd9Sstevel@tonic-gate * Can't attach a MAC port with unknown link speed, 3237c478bd9Sstevel@tonic-gate * down link, or not in full duplex mode. 3247c478bd9Sstevel@tonic-gate */ 3257c478bd9Sstevel@tonic-gate return (B_FALSE); 3267c478bd9Sstevel@tonic-gate } 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate if (grp->lg_ifspeed == 0) { 3297c478bd9Sstevel@tonic-gate /* 3307c478bd9Sstevel@tonic-gate * The group inherits the speed of the first link being 3317c478bd9Sstevel@tonic-gate * attached. 3327c478bd9Sstevel@tonic-gate */ 3337c478bd9Sstevel@tonic-gate grp->lg_ifspeed = port->lp_ifspeed; 3344deae11aSyz147064 link_state_changed = B_TRUE; 3357c478bd9Sstevel@tonic-gate } else if (grp->lg_ifspeed != port->lp_ifspeed) { 3367c478bd9Sstevel@tonic-gate /* 3377c478bd9Sstevel@tonic-gate * The link speed of the MAC port must be the same as 3387c478bd9Sstevel@tonic-gate * the group link speed, as per 802.3ad. Since it is 3397c478bd9Sstevel@tonic-gate * not, the attach is cancelled. 3407c478bd9Sstevel@tonic-gate */ 3417c478bd9Sstevel@tonic-gate return (B_FALSE); 3427c478bd9Sstevel@tonic-gate } 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate grp->lg_nattached_ports++; 3457c478bd9Sstevel@tonic-gate 3467c478bd9Sstevel@tonic-gate /* 3477c478bd9Sstevel@tonic-gate * Update the group link state. 3487c478bd9Sstevel@tonic-gate */ 3497c478bd9Sstevel@tonic-gate if (grp->lg_link_state != LINK_STATE_UP) { 3507c478bd9Sstevel@tonic-gate grp->lg_link_state = LINK_STATE_UP; 3517c478bd9Sstevel@tonic-gate grp->lg_link_duplex = LINK_DUPLEX_FULL; 3524deae11aSyz147064 link_state_changed = B_TRUE; 3537c478bd9Sstevel@tonic-gate } 3547c478bd9Sstevel@tonic-gate 3557c478bd9Sstevel@tonic-gate /* 3567c478bd9Sstevel@tonic-gate * Update port's state. 3577c478bd9Sstevel@tonic-gate */ 3587c478bd9Sstevel@tonic-gate port->lp_state = AGGR_PORT_STATE_ATTACHED; 3597c478bd9Sstevel@tonic-gate 360ae6aa22aSVenugopal Iyer aggr_grp_multicst_port(port, B_TRUE); 361ae6aa22aSVenugopal Iyer 3627c478bd9Sstevel@tonic-gate /* 363490ed22dSyz147064 * Set port's receive callback 364490ed22dSyz147064 */ 365da14cebeSEric Cheng mac_rx_set(port->lp_mch, aggr_recv_cb, port); 366490ed22dSyz147064 367490ed22dSyz147064 /* 3687c478bd9Sstevel@tonic-gate * If LACP is OFF, the port can be used to send data as soon 3697c478bd9Sstevel@tonic-gate * as its link is up and verified to be compatible with the 3707c478bd9Sstevel@tonic-gate * aggregation. 3717c478bd9Sstevel@tonic-gate * 3727c478bd9Sstevel@tonic-gate * If LACP is active or passive, notify the LACP subsystem, which 3737c478bd9Sstevel@tonic-gate * will enable sending on the port following the LACP protocol. 3747c478bd9Sstevel@tonic-gate */ 3757c478bd9Sstevel@tonic-gate if (grp->lg_lacp_mode == AGGR_LACP_OFF) 3767c478bd9Sstevel@tonic-gate aggr_send_port_enable(port); 3777c478bd9Sstevel@tonic-gate else 3787c478bd9Sstevel@tonic-gate aggr_lacp_port_attached(port); 3797c478bd9Sstevel@tonic-gate 3804deae11aSyz147064 return (link_state_changed); 3817c478bd9Sstevel@tonic-gate } 3827c478bd9Sstevel@tonic-gate 3837c478bd9Sstevel@tonic-gate boolean_t 384da14cebeSEric Cheng aggr_grp_detach_port(aggr_grp_t *grp, aggr_port_t *port) 3857c478bd9Sstevel@tonic-gate { 3864deae11aSyz147064 boolean_t link_state_changed = B_FALSE; 3877c478bd9Sstevel@tonic-gate 388da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 389da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(port->lp_mh)); 3907c478bd9Sstevel@tonic-gate 391da14cebeSEric Cheng /* update state */ 3927c478bd9Sstevel@tonic-gate if (port->lp_state != AGGR_PORT_STATE_ATTACHED) 3937c478bd9Sstevel@tonic-gate return (B_FALSE); 394490ed22dSyz147064 395da14cebeSEric Cheng mac_rx_clear(port->lp_mch); 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate aggr_grp_multicst_port(port, B_FALSE); 3987c478bd9Sstevel@tonic-gate 3997c478bd9Sstevel@tonic-gate if (grp->lg_lacp_mode == AGGR_LACP_OFF) 4007c478bd9Sstevel@tonic-gate aggr_send_port_disable(port); 401da14cebeSEric Cheng else 4027c478bd9Sstevel@tonic-gate aggr_lacp_port_detached(port); 4037c478bd9Sstevel@tonic-gate 40495c1c84bSRamesh Kumar Katla port->lp_state = AGGR_PORT_STATE_STANDBY; 405da14cebeSEric Cheng 4067c478bd9Sstevel@tonic-gate grp->lg_nattached_ports--; 4077c478bd9Sstevel@tonic-gate if (grp->lg_nattached_ports == 0) { 4087c478bd9Sstevel@tonic-gate /* the last attached MAC port of the group is being detached */ 4097c478bd9Sstevel@tonic-gate grp->lg_ifspeed = 0; 4107c478bd9Sstevel@tonic-gate grp->lg_link_state = LINK_STATE_DOWN; 4117c478bd9Sstevel@tonic-gate grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 4124deae11aSyz147064 link_state_changed = B_TRUE; 4137c478bd9Sstevel@tonic-gate } 4147c478bd9Sstevel@tonic-gate 4154deae11aSyz147064 return (link_state_changed); 4167c478bd9Sstevel@tonic-gate } 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate /* 4197c478bd9Sstevel@tonic-gate * Update the MAC addresses of the constituent ports of the specified 4207c478bd9Sstevel@tonic-gate * group. This function is invoked: 4217c478bd9Sstevel@tonic-gate * - after creating a new aggregation group. 4227c478bd9Sstevel@tonic-gate * - after adding new ports to an aggregation group. 4237c478bd9Sstevel@tonic-gate * - after removing a port from a group when the MAC address of 4247c478bd9Sstevel@tonic-gate * that port was used for the MAC address of the group. 4257c478bd9Sstevel@tonic-gate * - after the MAC address of a port changed when the MAC address 4267c478bd9Sstevel@tonic-gate * of that port was used for the MAC address of the group. 4274deae11aSyz147064 * 4284deae11aSyz147064 * Return true if the link state of the aggregation changed, for example 4294deae11aSyz147064 * as a result of a failure changing the MAC address of one of the 4304deae11aSyz147064 * constituent ports. 4317c478bd9Sstevel@tonic-gate */ 4324deae11aSyz147064 boolean_t 4337c478bd9Sstevel@tonic-gate aggr_grp_update_ports_mac(aggr_grp_t *grp) 4347c478bd9Sstevel@tonic-gate { 4357c478bd9Sstevel@tonic-gate aggr_port_t *cport; 4364deae11aSyz147064 boolean_t link_state_changed = B_FALSE; 437da14cebeSEric Cheng mac_perim_handle_t mph; 4387c478bd9Sstevel@tonic-gate 439da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 4404deae11aSyz147064 4417c478bd9Sstevel@tonic-gate for (cport = grp->lg_ports; cport != NULL; 4427c478bd9Sstevel@tonic-gate cport = cport->lp_next) { 443da14cebeSEric Cheng mac_perim_enter_by_mh(cport->lp_mh, &mph); 444da14cebeSEric Cheng if (aggr_port_unicst(cport) != 0) { 445da14cebeSEric Cheng if (aggr_grp_detach_port(grp, cport)) 446392b1d6eSyz147064 link_state_changed = B_TRUE; 4474deae11aSyz147064 } else { 4484deae11aSyz147064 /* 4494deae11aSyz147064 * If a port was detached because of a previous 4504deae11aSyz147064 * failure changing the MAC address, the port is 4514deae11aSyz147064 * reattached when it successfully changes the MAC 4524deae11aSyz147064 * address now, and this might cause the link state 4534deae11aSyz147064 * of the aggregation to change. 4544deae11aSyz147064 */ 455392b1d6eSyz147064 if (aggr_grp_attach_port(grp, cport)) 456392b1d6eSyz147064 link_state_changed = B_TRUE; 4577c478bd9Sstevel@tonic-gate } 458da14cebeSEric Cheng mac_perim_exit(mph); 4594deae11aSyz147064 } 4604deae11aSyz147064 return (link_state_changed); 4617c478bd9Sstevel@tonic-gate } 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate /* 4647c478bd9Sstevel@tonic-gate * Invoked when the MAC address of a port has changed. If the port's 4654deae11aSyz147064 * MAC address was used for the group MAC address, set mac_addr_changedp 4664deae11aSyz147064 * to B_TRUE to indicate to the caller that it should send a MAC_NOTE_UNICST 4674deae11aSyz147064 * notification. If the link state changes due to detach/attach of 4684deae11aSyz147064 * the constituent port, set link_state_changedp to B_TRUE to indicate 4694deae11aSyz147064 * to the caller that it should send a MAC_NOTE_LINK notification. In both 4704deae11aSyz147064 * cases, it is the responsibility of the caller to invoke notification 4714deae11aSyz147064 * functions after releasing the the port lock. 4727c478bd9Sstevel@tonic-gate */ 4734deae11aSyz147064 void 4744deae11aSyz147064 aggr_grp_port_mac_changed(aggr_grp_t *grp, aggr_port_t *port, 4754deae11aSyz147064 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 4767c478bd9Sstevel@tonic-gate { 477da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 478da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(port->lp_mh)); 4794deae11aSyz147064 ASSERT(mac_addr_changedp != NULL); 4804deae11aSyz147064 ASSERT(link_state_changedp != NULL); 4814deae11aSyz147064 4824deae11aSyz147064 *mac_addr_changedp = B_FALSE; 4834deae11aSyz147064 *link_state_changedp = B_FALSE; 4847c478bd9Sstevel@tonic-gate 4857c478bd9Sstevel@tonic-gate if (grp->lg_addr_fixed) { 4867c478bd9Sstevel@tonic-gate /* 4877c478bd9Sstevel@tonic-gate * The group is using a fixed MAC address or an automatic 4887c478bd9Sstevel@tonic-gate * MAC address has not been set. 4897c478bd9Sstevel@tonic-gate */ 4904deae11aSyz147064 return; 4917c478bd9Sstevel@tonic-gate } 4927c478bd9Sstevel@tonic-gate 4937c478bd9Sstevel@tonic-gate if (grp->lg_mac_addr_port == port) { 4947c478bd9Sstevel@tonic-gate /* 4957c478bd9Sstevel@tonic-gate * The MAC address of the port was assigned to the group 4967c478bd9Sstevel@tonic-gate * MAC address. Update the group MAC address. 4977c478bd9Sstevel@tonic-gate */ 4987c478bd9Sstevel@tonic-gate bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 4994deae11aSyz147064 *mac_addr_changedp = B_TRUE; 5007c478bd9Sstevel@tonic-gate } else { 5017c478bd9Sstevel@tonic-gate /* 5027c478bd9Sstevel@tonic-gate * Update the actual port MAC address to the MAC address 5037c478bd9Sstevel@tonic-gate * of the group. 5047c478bd9Sstevel@tonic-gate */ 505da14cebeSEric Cheng if (aggr_port_unicst(port) != 0) { 506da14cebeSEric Cheng *link_state_changedp = aggr_grp_detach_port(grp, port); 5074deae11aSyz147064 } else { 5084deae11aSyz147064 /* 5094deae11aSyz147064 * If a port was detached because of a previous 5104deae11aSyz147064 * failure changing the MAC address, the port is 5114deae11aSyz147064 * reattached when it successfully changes the MAC 5124deae11aSyz147064 * address now, and this might cause the link state 5134deae11aSyz147064 * of the aggregation to change. 5144deae11aSyz147064 */ 5154deae11aSyz147064 *link_state_changedp = aggr_grp_attach_port(grp, port); 5167c478bd9Sstevel@tonic-gate } 5174deae11aSyz147064 } 5187c478bd9Sstevel@tonic-gate } 5197c478bd9Sstevel@tonic-gate 5207c478bd9Sstevel@tonic-gate /* 5217c478bd9Sstevel@tonic-gate * Add a port to a link aggregation group. 5227c478bd9Sstevel@tonic-gate */ 5237c478bd9Sstevel@tonic-gate static int 524da14cebeSEric Cheng aggr_grp_add_port(aggr_grp_t *grp, datalink_id_t port_linkid, boolean_t force, 525d62bc4baSyz147064 aggr_port_t **pp) 5267c478bd9Sstevel@tonic-gate { 5277c478bd9Sstevel@tonic-gate aggr_port_t *port, **cport; 528da14cebeSEric Cheng mac_perim_handle_t mph; 5292b24ab6bSSebastien Roy zoneid_t port_zoneid = ALL_ZONES; 5307c478bd9Sstevel@tonic-gate int err; 5317c478bd9Sstevel@tonic-gate 5322b24ab6bSSebastien Roy /* The port must be int the same zone as the aggregation. */ 5332b24ab6bSSebastien Roy if (zone_check_datalink(&port_zoneid, port_linkid) != 0) 5342b24ab6bSSebastien Roy port_zoneid = GLOBAL_ZONEID; 5352b24ab6bSSebastien Roy if (grp->lg_zoneid != port_zoneid) 5362b24ab6bSSebastien Roy return (EBUSY); 5372b24ab6bSSebastien Roy 538da14cebeSEric Cheng /* 539da14cebeSEric Cheng * lg_mh could be NULL when the function is called during the creation 540da14cebeSEric Cheng * of the aggregation. 541da14cebeSEric Cheng */ 542da14cebeSEric Cheng ASSERT(grp->lg_mh == NULL || MAC_PERIM_HELD(grp->lg_mh)); 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate /* create new port */ 545da14cebeSEric Cheng err = aggr_port_create(grp, port_linkid, force, &port); 5467c478bd9Sstevel@tonic-gate if (err != 0) 5477c478bd9Sstevel@tonic-gate return (err); 5487c478bd9Sstevel@tonic-gate 549da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &mph); 5507c478bd9Sstevel@tonic-gate 5517c478bd9Sstevel@tonic-gate /* add port to list of group constituent ports */ 5527c478bd9Sstevel@tonic-gate cport = &grp->lg_ports; 5537c478bd9Sstevel@tonic-gate while (*cport != NULL) 5547c478bd9Sstevel@tonic-gate cport = &((*cport)->lp_next); 5557c478bd9Sstevel@tonic-gate *cport = port; 5567c478bd9Sstevel@tonic-gate 5577c478bd9Sstevel@tonic-gate /* 5587c478bd9Sstevel@tonic-gate * Back reference to the group it is member of. A port always 5597c478bd9Sstevel@tonic-gate * holds a reference to its group to ensure that the back 5607c478bd9Sstevel@tonic-gate * reference is always valid. 5617c478bd9Sstevel@tonic-gate */ 5627c478bd9Sstevel@tonic-gate port->lp_grp = grp; 5637c478bd9Sstevel@tonic-gate AGGR_GRP_REFHOLD(grp); 5647c478bd9Sstevel@tonic-gate grp->lg_nports++; 5657c478bd9Sstevel@tonic-gate 5667c478bd9Sstevel@tonic-gate aggr_lacp_init_port(port); 567da14cebeSEric Cheng mac_perim_exit(mph); 5687c478bd9Sstevel@tonic-gate 5697c478bd9Sstevel@tonic-gate if (pp != NULL) 5707c478bd9Sstevel@tonic-gate *pp = port; 5717c478bd9Sstevel@tonic-gate 5727c478bd9Sstevel@tonic-gate return (0); 5737c478bd9Sstevel@tonic-gate } 5747c478bd9Sstevel@tonic-gate 5757c478bd9Sstevel@tonic-gate /* 576*09b7f21aSRobert Mustacchi * This is called in response to either our LACP state machine or a MAC 577*09b7f21aSRobert Mustacchi * notification that the link has gone down via aggr_send_port_disable(). At 578*09b7f21aSRobert Mustacchi * this point, we may need to update our default ring. To that end, we go 579*09b7f21aSRobert Mustacchi * through the set of ports (underlying datalinks in an aggregation) that are 580*09b7f21aSRobert Mustacchi * currently enabled to transmit data. If all our links have been disabled for 581*09b7f21aSRobert Mustacchi * transmit, then we don't do anything. 582*09b7f21aSRobert Mustacchi * 583*09b7f21aSRobert Mustacchi * Note, because we only have a single TX group, we don't have to worry about 584*09b7f21aSRobert Mustacchi * the rings moving between groups and the chance that mac will reassign it 585*09b7f21aSRobert Mustacchi * unless someone removes a port, at which point, we play it safe and call this 586*09b7f21aSRobert Mustacchi * again. 587*09b7f21aSRobert Mustacchi */ 588*09b7f21aSRobert Mustacchi void 589*09b7f21aSRobert Mustacchi aggr_grp_update_default(aggr_grp_t *grp) 590*09b7f21aSRobert Mustacchi { 591*09b7f21aSRobert Mustacchi aggr_port_t *port; 592*09b7f21aSRobert Mustacchi ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 593*09b7f21aSRobert Mustacchi 594*09b7f21aSRobert Mustacchi rw_enter(&grp->lg_tx_lock, RW_WRITER); 595*09b7f21aSRobert Mustacchi 596*09b7f21aSRobert Mustacchi if (grp->lg_ntx_ports == 0) { 597*09b7f21aSRobert Mustacchi rw_exit(&grp->lg_tx_lock); 598*09b7f21aSRobert Mustacchi return; 599*09b7f21aSRobert Mustacchi } 600*09b7f21aSRobert Mustacchi 601*09b7f21aSRobert Mustacchi port = grp->lg_tx_ports[0]; 602*09b7f21aSRobert Mustacchi ASSERT(port->lp_tx_ring_cnt > 0); 603*09b7f21aSRobert Mustacchi mac_hwring_set_default(grp->lg_mh, port->lp_pseudo_tx_rings[0]); 604*09b7f21aSRobert Mustacchi rw_exit(&grp->lg_tx_lock); 605*09b7f21aSRobert Mustacchi } 606*09b7f21aSRobert Mustacchi 607*09b7f21aSRobert Mustacchi /* 6080dc2366fSVenugopal Iyer * Add a pseudo RX ring for the given HW ring handle. 609da14cebeSEric Cheng */ 610da14cebeSEric Cheng static int 611da14cebeSEric Cheng aggr_add_pseudo_rx_ring(aggr_port_t *port, 612da14cebeSEric Cheng aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 613da14cebeSEric Cheng { 614da14cebeSEric Cheng aggr_pseudo_rx_ring_t *ring; 615da14cebeSEric Cheng int err; 616da14cebeSEric Cheng int j; 617da14cebeSEric Cheng 618da14cebeSEric Cheng for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 619da14cebeSEric Cheng ring = rx_grp->arg_rings + j; 620da14cebeSEric Cheng if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE)) 621da14cebeSEric Cheng break; 622da14cebeSEric Cheng } 623da14cebeSEric Cheng 624da14cebeSEric Cheng /* 6250dc2366fSVenugopal Iyer * No slot for this new RX ring. 626da14cebeSEric Cheng */ 627da14cebeSEric Cheng if (j == MAX_RINGS_PER_GROUP) 628da14cebeSEric Cheng return (EIO); 629da14cebeSEric Cheng 630da14cebeSEric Cheng ring->arr_flags |= MAC_PSEUDO_RING_INUSE; 631da14cebeSEric Cheng ring->arr_hw_rh = hw_rh; 632da14cebeSEric Cheng ring->arr_port = port; 633da14cebeSEric Cheng rx_grp->arg_ring_cnt++; 634da14cebeSEric Cheng 635da14cebeSEric Cheng /* 636da14cebeSEric Cheng * The group is already registered, dynamically add a new ring to the 637da14cebeSEric Cheng * mac group. 638da14cebeSEric Cheng */ 639da14cebeSEric Cheng if ((err = mac_group_add_ring(rx_grp->arg_gh, j)) != 0) { 640da14cebeSEric Cheng ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 641da14cebeSEric Cheng ring->arr_hw_rh = NULL; 642da14cebeSEric Cheng ring->arr_port = NULL; 643da14cebeSEric Cheng rx_grp->arg_ring_cnt--; 6440dc2366fSVenugopal Iyer } else { 6450dc2366fSVenugopal Iyer mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, 6460dc2366fSVenugopal Iyer mac_find_ring(rx_grp->arg_gh, j)); 647da14cebeSEric Cheng } 648da14cebeSEric Cheng return (err); 649da14cebeSEric Cheng } 650da14cebeSEric Cheng 651da14cebeSEric Cheng /* 6520dc2366fSVenugopal Iyer * Remove the pseudo RX ring of the given HW ring handle. 653da14cebeSEric Cheng */ 654da14cebeSEric Cheng static void 655da14cebeSEric Cheng aggr_rem_pseudo_rx_ring(aggr_pseudo_rx_group_t *rx_grp, mac_ring_handle_t hw_rh) 656da14cebeSEric Cheng { 657da14cebeSEric Cheng aggr_pseudo_rx_ring_t *ring; 658da14cebeSEric Cheng int j; 659da14cebeSEric Cheng 660da14cebeSEric Cheng for (j = 0; j < MAX_RINGS_PER_GROUP; j++) { 661da14cebeSEric Cheng ring = rx_grp->arg_rings + j; 662da14cebeSEric Cheng if (!(ring->arr_flags & MAC_PSEUDO_RING_INUSE) || 663da14cebeSEric Cheng ring->arr_hw_rh != hw_rh) { 664da14cebeSEric Cheng continue; 665da14cebeSEric Cheng } 666da14cebeSEric Cheng 667da14cebeSEric Cheng mac_group_rem_ring(rx_grp->arg_gh, ring->arr_rh); 668da14cebeSEric Cheng 669da14cebeSEric Cheng ring->arr_flags &= ~MAC_PSEUDO_RING_INUSE; 670da14cebeSEric Cheng ring->arr_hw_rh = NULL; 671da14cebeSEric Cheng ring->arr_port = NULL; 672da14cebeSEric Cheng rx_grp->arg_ring_cnt--; 673da14cebeSEric Cheng mac_hwring_teardown(hw_rh); 674da14cebeSEric Cheng break; 675da14cebeSEric Cheng } 676da14cebeSEric Cheng } 677da14cebeSEric Cheng 678da14cebeSEric Cheng /* 679da14cebeSEric Cheng * This function is called to create pseudo rings over the hardware rings of 680da14cebeSEric Cheng * the underlying device. Note that there is a 1:1 mapping between the pseudo 681da14cebeSEric Cheng * RX rings of the aggr and the hardware rings of the underlying port. 682da14cebeSEric Cheng */ 683da14cebeSEric Cheng static int 684da14cebeSEric Cheng aggr_add_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 685da14cebeSEric Cheng { 686da14cebeSEric Cheng aggr_grp_t *grp = port->lp_grp; 687da14cebeSEric Cheng mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 688da14cebeSEric Cheng aggr_unicst_addr_t *addr, *a; 689da14cebeSEric Cheng mac_perim_handle_t pmph; 690da14cebeSEric Cheng int hw_rh_cnt, i = 0, j; 691da14cebeSEric Cheng int err = 0; 692da14cebeSEric Cheng 693da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 694da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 695da14cebeSEric Cheng 696da14cebeSEric Cheng /* 697da14cebeSEric Cheng * This function must be called after the aggr registers its mac 698da14cebeSEric Cheng * and its RX group has been initialized. 699da14cebeSEric Cheng */ 700da14cebeSEric Cheng ASSERT(rx_grp->arg_gh != NULL); 701da14cebeSEric Cheng 702da14cebeSEric Cheng /* 703da14cebeSEric Cheng * Get the list the the underlying HW rings. 704da14cebeSEric Cheng */ 7050dc2366fSVenugopal Iyer hw_rh_cnt = mac_hwrings_get(port->lp_mch, 7060dc2366fSVenugopal Iyer &port->lp_hwgh, hw_rh, MAC_RING_TYPE_RX); 707da14cebeSEric Cheng 708da14cebeSEric Cheng if (port->lp_hwgh != NULL) { 709da14cebeSEric Cheng /* 710da14cebeSEric Cheng * Quiesce the HW ring and the mac srs on the ring. Note 711da14cebeSEric Cheng * that the HW ring will be restarted when the pseudo ring 712da14cebeSEric Cheng * is started. At that time all the packets will be 713da14cebeSEric Cheng * directly passed up to the pseudo RX ring and handled 714da14cebeSEric Cheng * by mac srs created over the pseudo RX ring. 715da14cebeSEric Cheng */ 716da14cebeSEric Cheng mac_rx_client_quiesce(port->lp_mch); 717da14cebeSEric Cheng mac_srs_perm_quiesce(port->lp_mch, B_TRUE); 718da14cebeSEric Cheng } 719da14cebeSEric Cheng 720da14cebeSEric Cheng /* 721da14cebeSEric Cheng * Add all the unicast addresses to the newly added port. 722da14cebeSEric Cheng */ 723da14cebeSEric Cheng for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) { 724da14cebeSEric Cheng if ((err = aggr_port_addmac(port, addr->aua_addr)) != 0) 725da14cebeSEric Cheng break; 726da14cebeSEric Cheng } 727da14cebeSEric Cheng 728da14cebeSEric Cheng for (i = 0; err == 0 && i < hw_rh_cnt; i++) 729da14cebeSEric Cheng err = aggr_add_pseudo_rx_ring(port, rx_grp, hw_rh[i]); 730da14cebeSEric Cheng 731da14cebeSEric Cheng if (err != 0) { 732da14cebeSEric Cheng for (j = 0; j < i; j++) 733da14cebeSEric Cheng aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[j]); 734da14cebeSEric Cheng 735da14cebeSEric Cheng for (a = rx_grp->arg_macaddr; a != addr; a = a->aua_next) 736da14cebeSEric Cheng aggr_port_remmac(port, a->aua_addr); 737da14cebeSEric Cheng 738da14cebeSEric Cheng if (port->lp_hwgh != NULL) { 739da14cebeSEric Cheng mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 740da14cebeSEric Cheng mac_rx_client_restart(port->lp_mch); 741da14cebeSEric Cheng port->lp_hwgh = NULL; 742da14cebeSEric Cheng } 743da14cebeSEric Cheng } else { 7440dc2366fSVenugopal Iyer port->lp_rx_grp_added = B_TRUE; 745da14cebeSEric Cheng } 746da14cebeSEric Cheng done: 747da14cebeSEric Cheng mac_perim_exit(pmph); 748da14cebeSEric Cheng return (err); 749da14cebeSEric Cheng } 750da14cebeSEric Cheng 751da14cebeSEric Cheng /* 752da14cebeSEric Cheng * This function is called by aggr to remove pseudo RX rings over the 753da14cebeSEric Cheng * HW rings of the underlying port. 754da14cebeSEric Cheng */ 755da14cebeSEric Cheng static void 756da14cebeSEric Cheng aggr_rem_pseudo_rx_group(aggr_port_t *port, aggr_pseudo_rx_group_t *rx_grp) 757da14cebeSEric Cheng { 758da14cebeSEric Cheng aggr_grp_t *grp = port->lp_grp; 759da14cebeSEric Cheng mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP]; 760da14cebeSEric Cheng aggr_unicst_addr_t *addr; 761da14cebeSEric Cheng mac_group_handle_t hwgh; 762da14cebeSEric Cheng mac_perim_handle_t pmph; 763da14cebeSEric Cheng int hw_rh_cnt, i; 764da14cebeSEric Cheng 765da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 766da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 767da14cebeSEric Cheng 7680dc2366fSVenugopal Iyer if (!port->lp_rx_grp_added) 769da14cebeSEric Cheng goto done; 770da14cebeSEric Cheng 771da14cebeSEric Cheng ASSERT(rx_grp->arg_gh != NULL); 7720dc2366fSVenugopal Iyer hw_rh_cnt = mac_hwrings_get(port->lp_mch, 7730dc2366fSVenugopal Iyer &hwgh, hw_rh, MAC_RING_TYPE_RX); 774da14cebeSEric Cheng 775da14cebeSEric Cheng /* 776da14cebeSEric Cheng * If hw_rh_cnt is 0, it means that the underlying port does not 777da14cebeSEric Cheng * support RX rings. Directly return in this case. 778da14cebeSEric Cheng */ 779da14cebeSEric Cheng for (i = 0; i < hw_rh_cnt; i++) 780da14cebeSEric Cheng aggr_rem_pseudo_rx_ring(rx_grp, hw_rh[i]); 781da14cebeSEric Cheng 782da14cebeSEric Cheng for (addr = rx_grp->arg_macaddr; addr != NULL; addr = addr->aua_next) 783da14cebeSEric Cheng aggr_port_remmac(port, addr->aua_addr); 784da14cebeSEric Cheng 785da14cebeSEric Cheng if (port->lp_hwgh != NULL) { 786da14cebeSEric Cheng port->lp_hwgh = NULL; 787da14cebeSEric Cheng 788da14cebeSEric Cheng /* 789da14cebeSEric Cheng * First clear the permanent-quiesced flag of the RX srs then 790da14cebeSEric Cheng * restart the HW ring and the mac srs on the ring. Note that 791da14cebeSEric Cheng * the HW ring and associated SRS will soon been removed when 792da14cebeSEric Cheng * the port is removed from the aggr. 793da14cebeSEric Cheng */ 794da14cebeSEric Cheng mac_srs_perm_quiesce(port->lp_mch, B_FALSE); 795da14cebeSEric Cheng mac_rx_client_restart(port->lp_mch); 796da14cebeSEric Cheng } 797da14cebeSEric Cheng 7980dc2366fSVenugopal Iyer port->lp_rx_grp_added = B_FALSE; 7990dc2366fSVenugopal Iyer done: 8000dc2366fSVenugopal Iyer mac_perim_exit(pmph); 8010dc2366fSVenugopal Iyer } 8020dc2366fSVenugopal Iyer 8030dc2366fSVenugopal Iyer /* 8040dc2366fSVenugopal Iyer * Add a pseudo TX ring for the given HW ring handle. 8050dc2366fSVenugopal Iyer */ 8060dc2366fSVenugopal Iyer static int 8070dc2366fSVenugopal Iyer aggr_add_pseudo_tx_ring(aggr_port_t *port, 8080dc2366fSVenugopal Iyer aggr_pseudo_tx_group_t *tx_grp, mac_ring_handle_t hw_rh, 8090dc2366fSVenugopal Iyer mac_ring_handle_t *pseudo_rh) 8100dc2366fSVenugopal Iyer { 8110dc2366fSVenugopal Iyer aggr_pseudo_tx_ring_t *ring; 8120dc2366fSVenugopal Iyer int err; 8130dc2366fSVenugopal Iyer int i; 8140dc2366fSVenugopal Iyer 8150dc2366fSVenugopal Iyer ASSERT(MAC_PERIM_HELD(port->lp_mh)); 8160dc2366fSVenugopal Iyer for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { 8170dc2366fSVenugopal Iyer ring = tx_grp->atg_rings + i; 8180dc2366fSVenugopal Iyer if (!(ring->atr_flags & MAC_PSEUDO_RING_INUSE)) 8190dc2366fSVenugopal Iyer break; 8200dc2366fSVenugopal Iyer } 8210dc2366fSVenugopal Iyer /* 8220dc2366fSVenugopal Iyer * No slot for this new TX ring. 8230dc2366fSVenugopal Iyer */ 8240dc2366fSVenugopal Iyer if (i == MAX_RINGS_PER_GROUP) 8250dc2366fSVenugopal Iyer return (EIO); 8260dc2366fSVenugopal Iyer /* 8270dc2366fSVenugopal Iyer * The following 4 statements needs to be done before 8280dc2366fSVenugopal Iyer * calling mac_group_add_ring(). Otherwise it will 8290dc2366fSVenugopal Iyer * result in an assertion failure in mac_init_ring(). 8300dc2366fSVenugopal Iyer */ 8310dc2366fSVenugopal Iyer ring->atr_flags |= MAC_PSEUDO_RING_INUSE; 8320dc2366fSVenugopal Iyer ring->atr_hw_rh = hw_rh; 8330dc2366fSVenugopal Iyer ring->atr_port = port; 8340dc2366fSVenugopal Iyer tx_grp->atg_ring_cnt++; 8350dc2366fSVenugopal Iyer 8360dc2366fSVenugopal Iyer /* 8370dc2366fSVenugopal Iyer * The TX side has no concept of ring groups unlike RX groups. 8380dc2366fSVenugopal Iyer * There is just a single group which stores all the TX rings. 8390dc2366fSVenugopal Iyer * This group will be used to store aggr's pseudo TX rings. 8400dc2366fSVenugopal Iyer */ 8410dc2366fSVenugopal Iyer if ((err = mac_group_add_ring(tx_grp->atg_gh, i)) != 0) { 8420dc2366fSVenugopal Iyer ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; 8430dc2366fSVenugopal Iyer ring->atr_hw_rh = NULL; 8440dc2366fSVenugopal Iyer ring->atr_port = NULL; 8450dc2366fSVenugopal Iyer tx_grp->atg_ring_cnt--; 8460dc2366fSVenugopal Iyer } else { 8470dc2366fSVenugopal Iyer *pseudo_rh = mac_find_ring(tx_grp->atg_gh, i); 8480dc2366fSVenugopal Iyer if (hw_rh != NULL) { 8490dc2366fSVenugopal Iyer mac_hwring_setup(hw_rh, (mac_resource_handle_t)ring, 8500dc2366fSVenugopal Iyer mac_find_ring(tx_grp->atg_gh, i)); 8510dc2366fSVenugopal Iyer } 8520dc2366fSVenugopal Iyer } 853*09b7f21aSRobert Mustacchi 8540dc2366fSVenugopal Iyer return (err); 8550dc2366fSVenugopal Iyer } 8560dc2366fSVenugopal Iyer 8570dc2366fSVenugopal Iyer /* 8580dc2366fSVenugopal Iyer * Remove the pseudo TX ring of the given HW ring handle. 8590dc2366fSVenugopal Iyer */ 8600dc2366fSVenugopal Iyer static void 8610dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_ring(aggr_pseudo_tx_group_t *tx_grp, 8620dc2366fSVenugopal Iyer mac_ring_handle_t pseudo_hw_rh) 8630dc2366fSVenugopal Iyer { 8640dc2366fSVenugopal Iyer aggr_pseudo_tx_ring_t *ring; 8650dc2366fSVenugopal Iyer int i; 8660dc2366fSVenugopal Iyer 8670dc2366fSVenugopal Iyer for (i = 0; i < MAX_RINGS_PER_GROUP; i++) { 8680dc2366fSVenugopal Iyer ring = tx_grp->atg_rings + i; 8690dc2366fSVenugopal Iyer if (ring->atr_rh != pseudo_hw_rh) 8700dc2366fSVenugopal Iyer continue; 8710dc2366fSVenugopal Iyer 8720dc2366fSVenugopal Iyer ASSERT(ring->atr_flags & MAC_PSEUDO_RING_INUSE); 8730dc2366fSVenugopal Iyer mac_group_rem_ring(tx_grp->atg_gh, pseudo_hw_rh); 8740dc2366fSVenugopal Iyer ring->atr_flags &= ~MAC_PSEUDO_RING_INUSE; 8750dc2366fSVenugopal Iyer mac_hwring_teardown(ring->atr_hw_rh); 8760dc2366fSVenugopal Iyer ring->atr_hw_rh = NULL; 8770dc2366fSVenugopal Iyer ring->atr_port = NULL; 8780dc2366fSVenugopal Iyer tx_grp->atg_ring_cnt--; 8790dc2366fSVenugopal Iyer break; 8800dc2366fSVenugopal Iyer } 8810dc2366fSVenugopal Iyer } 8820dc2366fSVenugopal Iyer 8830dc2366fSVenugopal Iyer /* 8840dc2366fSVenugopal Iyer * This function is called to create pseudo rings over hardware rings of 8850dc2366fSVenugopal Iyer * the underlying device. There is a 1:1 mapping between the pseudo TX 8860dc2366fSVenugopal Iyer * rings of the aggr and the hardware rings of the underlying port. 8870dc2366fSVenugopal Iyer */ 8880dc2366fSVenugopal Iyer static int 8890dc2366fSVenugopal Iyer aggr_add_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) 8900dc2366fSVenugopal Iyer { 8910dc2366fSVenugopal Iyer aggr_grp_t *grp = port->lp_grp; 8920dc2366fSVenugopal Iyer mac_ring_handle_t hw_rh[MAX_RINGS_PER_GROUP], pseudo_rh; 8930dc2366fSVenugopal Iyer mac_perim_handle_t pmph; 8940dc2366fSVenugopal Iyer int hw_rh_cnt, i = 0, j; 8950dc2366fSVenugopal Iyer int err = 0; 8960dc2366fSVenugopal Iyer 8970dc2366fSVenugopal Iyer ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 8980dc2366fSVenugopal Iyer mac_perim_enter_by_mh(port->lp_mh, &pmph); 8990dc2366fSVenugopal Iyer 9000dc2366fSVenugopal Iyer /* 9010dc2366fSVenugopal Iyer * Get the list the the underlying HW rings. 9020dc2366fSVenugopal Iyer */ 9030dc2366fSVenugopal Iyer hw_rh_cnt = mac_hwrings_get(port->lp_mch, 9040dc2366fSVenugopal Iyer NULL, hw_rh, MAC_RING_TYPE_TX); 9050dc2366fSVenugopal Iyer 9060dc2366fSVenugopal Iyer /* 9070dc2366fSVenugopal Iyer * Even if the underlying NIC does not have TX rings, we 9080dc2366fSVenugopal Iyer * still make a psuedo TX ring for that NIC with NULL as 9090dc2366fSVenugopal Iyer * the ring handle. 9100dc2366fSVenugopal Iyer */ 9110dc2366fSVenugopal Iyer if (hw_rh_cnt == 0) 9120dc2366fSVenugopal Iyer port->lp_tx_ring_cnt = 1; 9130dc2366fSVenugopal Iyer else 9140dc2366fSVenugopal Iyer port->lp_tx_ring_cnt = hw_rh_cnt; 9150dc2366fSVenugopal Iyer 9160dc2366fSVenugopal Iyer port->lp_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 9170dc2366fSVenugopal Iyer port->lp_tx_ring_cnt), KM_SLEEP); 9180dc2366fSVenugopal Iyer port->lp_pseudo_tx_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 9190dc2366fSVenugopal Iyer port->lp_tx_ring_cnt), KM_SLEEP); 9200dc2366fSVenugopal Iyer 9210dc2366fSVenugopal Iyer if (hw_rh_cnt == 0) { 9220dc2366fSVenugopal Iyer if ((err = aggr_add_pseudo_tx_ring(port, tx_grp, 9230dc2366fSVenugopal Iyer NULL, &pseudo_rh)) == 0) { 9240dc2366fSVenugopal Iyer port->lp_tx_rings[0] = NULL; 9250dc2366fSVenugopal Iyer port->lp_pseudo_tx_rings[0] = pseudo_rh; 9260dc2366fSVenugopal Iyer } 9270dc2366fSVenugopal Iyer } else { 9280dc2366fSVenugopal Iyer for (i = 0; err == 0 && i < hw_rh_cnt; i++) { 9290dc2366fSVenugopal Iyer err = aggr_add_pseudo_tx_ring(port, 9300dc2366fSVenugopal Iyer tx_grp, hw_rh[i], &pseudo_rh); 9310dc2366fSVenugopal Iyer if (err != 0) 9320dc2366fSVenugopal Iyer break; 9330dc2366fSVenugopal Iyer port->lp_tx_rings[i] = hw_rh[i]; 9340dc2366fSVenugopal Iyer port->lp_pseudo_tx_rings[i] = pseudo_rh; 9350dc2366fSVenugopal Iyer } 9360dc2366fSVenugopal Iyer } 9370dc2366fSVenugopal Iyer 9380dc2366fSVenugopal Iyer if (err != 0) { 9390dc2366fSVenugopal Iyer if (hw_rh_cnt != 0) { 9400dc2366fSVenugopal Iyer for (j = 0; j < i; j++) { 9410dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_ring(tx_grp, 9420dc2366fSVenugopal Iyer port->lp_pseudo_tx_rings[j]); 9430dc2366fSVenugopal Iyer } 9440dc2366fSVenugopal Iyer } 9450dc2366fSVenugopal Iyer kmem_free(port->lp_tx_rings, 9460dc2366fSVenugopal Iyer (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 9470dc2366fSVenugopal Iyer kmem_free(port->lp_pseudo_tx_rings, 9480dc2366fSVenugopal Iyer (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 9490dc2366fSVenugopal Iyer port->lp_tx_ring_cnt = 0; 9500dc2366fSVenugopal Iyer } else { 9510dc2366fSVenugopal Iyer port->lp_tx_grp_added = B_TRUE; 9520dc2366fSVenugopal Iyer port->lp_tx_notify_mh = mac_client_tx_notify(port->lp_mch, 9530dc2366fSVenugopal Iyer aggr_tx_ring_update, port); 9540dc2366fSVenugopal Iyer } 9550dc2366fSVenugopal Iyer mac_perim_exit(pmph); 956*09b7f21aSRobert Mustacchi aggr_grp_update_default(grp); 9570dc2366fSVenugopal Iyer return (err); 9580dc2366fSVenugopal Iyer } 9590dc2366fSVenugopal Iyer 9600dc2366fSVenugopal Iyer /* 9610dc2366fSVenugopal Iyer * This function is called by aggr to remove pseudo TX rings over the 9620dc2366fSVenugopal Iyer * HW rings of the underlying port. 9630dc2366fSVenugopal Iyer */ 9640dc2366fSVenugopal Iyer static void 9650dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_group(aggr_port_t *port, aggr_pseudo_tx_group_t *tx_grp) 9660dc2366fSVenugopal Iyer { 9670dc2366fSVenugopal Iyer aggr_grp_t *grp = port->lp_grp; 9680dc2366fSVenugopal Iyer mac_perim_handle_t pmph; 9690dc2366fSVenugopal Iyer int i; 9700dc2366fSVenugopal Iyer 9710dc2366fSVenugopal Iyer ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 9720dc2366fSVenugopal Iyer mac_perim_enter_by_mh(port->lp_mh, &pmph); 9730dc2366fSVenugopal Iyer 9740dc2366fSVenugopal Iyer if (!port->lp_tx_grp_added) 9750dc2366fSVenugopal Iyer goto done; 9760dc2366fSVenugopal Iyer 9770dc2366fSVenugopal Iyer ASSERT(tx_grp->atg_gh != NULL); 9780dc2366fSVenugopal Iyer 9790dc2366fSVenugopal Iyer for (i = 0; i < port->lp_tx_ring_cnt; i++) 9800dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_ring(tx_grp, port->lp_pseudo_tx_rings[i]); 9810dc2366fSVenugopal Iyer 9820dc2366fSVenugopal Iyer kmem_free(port->lp_tx_rings, 9830dc2366fSVenugopal Iyer (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 9840dc2366fSVenugopal Iyer kmem_free(port->lp_pseudo_tx_rings, 9850dc2366fSVenugopal Iyer (sizeof (mac_ring_handle_t *) * port->lp_tx_ring_cnt)); 9860dc2366fSVenugopal Iyer 9870dc2366fSVenugopal Iyer port->lp_tx_ring_cnt = 0; 9880dc2366fSVenugopal Iyer (void) mac_client_tx_notify(port->lp_mch, NULL, port->lp_tx_notify_mh); 9890dc2366fSVenugopal Iyer port->lp_tx_grp_added = B_FALSE; 990*09b7f21aSRobert Mustacchi aggr_grp_update_default(grp); 991da14cebeSEric Cheng done: 992da14cebeSEric Cheng mac_perim_exit(pmph); 993da14cebeSEric Cheng } 994da14cebeSEric Cheng 995da14cebeSEric Cheng static int 996da14cebeSEric Cheng aggr_pseudo_disable_intr(mac_intr_handle_t ih) 997da14cebeSEric Cheng { 998da14cebeSEric Cheng aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 999da14cebeSEric Cheng return (mac_hwring_disable_intr(rr_ring->arr_hw_rh)); 1000da14cebeSEric Cheng } 1001da14cebeSEric Cheng 1002da14cebeSEric Cheng static int 1003da14cebeSEric Cheng aggr_pseudo_enable_intr(mac_intr_handle_t ih) 1004da14cebeSEric Cheng { 1005da14cebeSEric Cheng aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)ih; 1006da14cebeSEric Cheng return (mac_hwring_enable_intr(rr_ring->arr_hw_rh)); 1007da14cebeSEric Cheng } 1008da14cebeSEric Cheng 1009da14cebeSEric Cheng static int 1010da14cebeSEric Cheng aggr_pseudo_start_ring(mac_ring_driver_t arg, uint64_t mr_gen) 1011da14cebeSEric Cheng { 1012da14cebeSEric Cheng aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 1013da14cebeSEric Cheng int err; 1014da14cebeSEric Cheng 1015da14cebeSEric Cheng err = mac_hwring_start(rr_ring->arr_hw_rh); 1016da14cebeSEric Cheng if (err == 0) 1017da14cebeSEric Cheng rr_ring->arr_gen = mr_gen; 1018da14cebeSEric Cheng return (err); 1019da14cebeSEric Cheng } 1020da14cebeSEric Cheng 1021da14cebeSEric Cheng static void 1022da14cebeSEric Cheng aggr_pseudo_stop_ring(mac_ring_driver_t arg) 1023da14cebeSEric Cheng { 1024da14cebeSEric Cheng aggr_pseudo_rx_ring_t *rr_ring = (aggr_pseudo_rx_ring_t *)arg; 1025da14cebeSEric Cheng mac_hwring_stop(rr_ring->arr_hw_rh); 1026da14cebeSEric Cheng } 1027da14cebeSEric Cheng 1028da14cebeSEric Cheng /* 10297c478bd9Sstevel@tonic-gate * Add one or more ports to an existing link aggregation group. 10307c478bd9Sstevel@tonic-gate */ 10317c478bd9Sstevel@tonic-gate int 1032d62bc4baSyz147064 aggr_grp_add_ports(datalink_id_t linkid, uint_t nports, boolean_t force, 1033d62bc4baSyz147064 laioc_port_t *ports) 10347c478bd9Sstevel@tonic-gate { 10357c478bd9Sstevel@tonic-gate int rc, i, nadded = 0; 10367c478bd9Sstevel@tonic-gate aggr_grp_t *grp = NULL; 10377c478bd9Sstevel@tonic-gate aggr_port_t *port; 1038c615009fSyz147064 boolean_t link_state_changed = B_FALSE; 1039da14cebeSEric Cheng mac_perim_handle_t mph, pmph; 10407c478bd9Sstevel@tonic-gate 1041d62bc4baSyz147064 /* get group corresponding to linkid */ 1042210db224Sericheng rw_enter(&aggr_grp_lock, RW_READER); 1043d62bc4baSyz147064 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1044210db224Sericheng (mod_hash_val_t *)&grp) != 0) { 1045210db224Sericheng rw_exit(&aggr_grp_lock); 1046210db224Sericheng return (ENOENT); 10477c478bd9Sstevel@tonic-gate } 10487c478bd9Sstevel@tonic-gate AGGR_GRP_REFHOLD(grp); 10497c478bd9Sstevel@tonic-gate 1050da14cebeSEric Cheng /* 1051da14cebeSEric Cheng * Hold the perimeter so that the aggregation won't be destroyed. 1052da14cebeSEric Cheng */ 1053da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 1054da14cebeSEric Cheng rw_exit(&aggr_grp_lock); 10557c478bd9Sstevel@tonic-gate 10567c478bd9Sstevel@tonic-gate /* add the specified ports to group */ 10577c478bd9Sstevel@tonic-gate for (i = 0; i < nports; i++) { 10587c478bd9Sstevel@tonic-gate /* add port to group */ 1059d62bc4baSyz147064 if ((rc = aggr_grp_add_port(grp, ports[i].lp_linkid, 1060d62bc4baSyz147064 force, &port)) != 0) { 10617c478bd9Sstevel@tonic-gate goto bail; 1062ba2e4443Sseb } 10637c478bd9Sstevel@tonic-gate ASSERT(port != NULL); 10647c478bd9Sstevel@tonic-gate nadded++; 10657c478bd9Sstevel@tonic-gate 10667c478bd9Sstevel@tonic-gate /* check capabilities */ 1067f4420ae7Snd99603 if (!aggr_grp_capab_check(grp, port) || 1068d62bc4baSyz147064 !aggr_grp_sdu_check(grp, port) || 1069d62bc4baSyz147064 !aggr_grp_margin_check(grp, port)) { 10707c478bd9Sstevel@tonic-gate rc = ENOTSUP; 10717c478bd9Sstevel@tonic-gate goto bail; 10727c478bd9Sstevel@tonic-gate } 10737c478bd9Sstevel@tonic-gate 1074da14cebeSEric Cheng /* 1075da14cebeSEric Cheng * Create the pseudo ring for each HW ring of the underlying 1076da14cebeSEric Cheng * port. 1077da14cebeSEric Cheng */ 10780dc2366fSVenugopal Iyer rc = aggr_add_pseudo_tx_group(port, &grp->lg_tx_group); 10790dc2366fSVenugopal Iyer if (rc != 0) 10800dc2366fSVenugopal Iyer goto bail; 1081da14cebeSEric Cheng rc = aggr_add_pseudo_rx_group(port, &grp->lg_rx_group); 1082da14cebeSEric Cheng if (rc != 0) 1083da14cebeSEric Cheng goto bail; 1084da14cebeSEric Cheng 1085da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 1086da14cebeSEric Cheng 1087da14cebeSEric Cheng /* set LACP mode */ 1088da14cebeSEric Cheng aggr_port_lacp_set_mode(grp, port); 1089da14cebeSEric Cheng 10907c478bd9Sstevel@tonic-gate /* start port if group has already been started */ 10917c478bd9Sstevel@tonic-gate if (grp->lg_started) { 10927c478bd9Sstevel@tonic-gate rc = aggr_port_start(port); 10937c478bd9Sstevel@tonic-gate if (rc != 0) { 1094da14cebeSEric Cheng mac_perim_exit(pmph); 10957c478bd9Sstevel@tonic-gate goto bail; 10967c478bd9Sstevel@tonic-gate } 10977c478bd9Sstevel@tonic-gate 1098da14cebeSEric Cheng /* 1099da14cebeSEric Cheng * Turn on the promiscuous mode over the port when it 1100da14cebeSEric Cheng * is requested to be turned on to receive the 1101da14cebeSEric Cheng * non-primary address over a port, or the promiscous 1102da14cebeSEric Cheng * mode is enabled over the aggr. 1103da14cebeSEric Cheng */ 1104da14cebeSEric Cheng if (grp->lg_promisc || port->lp_prom_addr != NULL) { 1105da14cebeSEric Cheng rc = aggr_port_promisc(port, B_TRUE); 11067c478bd9Sstevel@tonic-gate if (rc != 0) { 1107da14cebeSEric Cheng mac_perim_exit(pmph); 11087c478bd9Sstevel@tonic-gate goto bail; 11097c478bd9Sstevel@tonic-gate } 11107c478bd9Sstevel@tonic-gate } 1111da14cebeSEric Cheng } 1112da14cebeSEric Cheng mac_perim_exit(pmph); 1113c615009fSyz147064 1114c615009fSyz147064 /* 1115c615009fSyz147064 * Attach each port if necessary. 1116c615009fSyz147064 */ 1117da14cebeSEric Cheng if (aggr_port_notify_link(grp, port)) 1118392b1d6eSyz147064 link_state_changed = B_TRUE; 1119da14cebeSEric Cheng 1120da14cebeSEric Cheng /* 1121da14cebeSEric Cheng * Initialize the callback functions for this port. 1122da14cebeSEric Cheng */ 1123da14cebeSEric Cheng aggr_port_init_callbacks(port); 11247c478bd9Sstevel@tonic-gate } 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate /* update the MAC address of the constituent ports */ 1127392b1d6eSyz147064 if (aggr_grp_update_ports_mac(grp)) 1128392b1d6eSyz147064 link_state_changed = B_TRUE; 1129c615009fSyz147064 1130c615009fSyz147064 if (link_state_changed) 1131ba2e4443Sseb mac_link_update(grp->lg_mh, grp->lg_link_state); 11327c478bd9Sstevel@tonic-gate 11337c478bd9Sstevel@tonic-gate bail: 11347c478bd9Sstevel@tonic-gate if (rc != 0) { 11357c478bd9Sstevel@tonic-gate /* stop and remove ports that have been added */ 1136da14cebeSEric Cheng for (i = 0; i < nadded; i++) { 1137d62bc4baSyz147064 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 11387c478bd9Sstevel@tonic-gate ASSERT(port != NULL); 11397c478bd9Sstevel@tonic-gate if (grp->lg_started) { 1140da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 1141da14cebeSEric Cheng (void) aggr_port_promisc(port, B_FALSE); 11427c478bd9Sstevel@tonic-gate aggr_port_stop(port); 1143da14cebeSEric Cheng mac_perim_exit(pmph); 11447c478bd9Sstevel@tonic-gate } 11450dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1146da14cebeSEric Cheng aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 11474deae11aSyz147064 (void) aggr_grp_rem_port(grp, port, NULL, NULL); 11487c478bd9Sstevel@tonic-gate } 11497c478bd9Sstevel@tonic-gate } 11507c478bd9Sstevel@tonic-gate 1151da14cebeSEric Cheng mac_perim_exit(mph); 11527c478bd9Sstevel@tonic-gate AGGR_GRP_REFRELE(grp); 11537c478bd9Sstevel@tonic-gate return (rc); 11547c478bd9Sstevel@tonic-gate } 11557c478bd9Sstevel@tonic-gate 1156da14cebeSEric Cheng static int 1157da14cebeSEric Cheng aggr_grp_modify_common(aggr_grp_t *grp, uint8_t update_mask, uint32_t policy, 1158da14cebeSEric Cheng boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 1159da14cebeSEric Cheng aggr_lacp_timer_t lacp_timer) 11607c478bd9Sstevel@tonic-gate { 11617c478bd9Sstevel@tonic-gate boolean_t mac_addr_changed = B_FALSE; 11624deae11aSyz147064 boolean_t link_state_changed = B_FALSE; 1163da14cebeSEric Cheng mac_perim_handle_t pmph; 11647c478bd9Sstevel@tonic-gate 1165da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 11667c478bd9Sstevel@tonic-gate 11677c478bd9Sstevel@tonic-gate /* validate fixed address if specified */ 11687c478bd9Sstevel@tonic-gate if ((update_mask & AGGR_MODIFY_MAC) && mac_fixed && 11697c478bd9Sstevel@tonic-gate ((bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) || 11707c478bd9Sstevel@tonic-gate (mac_addr[0] & 0x01))) { 1171da14cebeSEric Cheng return (EINVAL); 11727c478bd9Sstevel@tonic-gate } 11737c478bd9Sstevel@tonic-gate 11747c478bd9Sstevel@tonic-gate /* update policy if requested */ 11757c478bd9Sstevel@tonic-gate if (update_mask & AGGR_MODIFY_POLICY) 11767c478bd9Sstevel@tonic-gate aggr_send_update_policy(grp, policy); 11777c478bd9Sstevel@tonic-gate 11787c478bd9Sstevel@tonic-gate /* update unicast MAC address if requested */ 11797c478bd9Sstevel@tonic-gate if (update_mask & AGGR_MODIFY_MAC) { 11807c478bd9Sstevel@tonic-gate if (mac_fixed) { 11817c478bd9Sstevel@tonic-gate /* user-supplied MAC address */ 11827c478bd9Sstevel@tonic-gate grp->lg_mac_addr_port = NULL; 11837c478bd9Sstevel@tonic-gate if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) != 0) { 11847c478bd9Sstevel@tonic-gate bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 11857c478bd9Sstevel@tonic-gate mac_addr_changed = B_TRUE; 11867c478bd9Sstevel@tonic-gate } 11877c478bd9Sstevel@tonic-gate } else if (grp->lg_addr_fixed) { 11887c478bd9Sstevel@tonic-gate /* switch from user-supplied to automatic */ 11897c478bd9Sstevel@tonic-gate aggr_port_t *port = grp->lg_ports; 11907c478bd9Sstevel@tonic-gate 1191da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 11927c478bd9Sstevel@tonic-gate bcopy(port->lp_addr, grp->lg_addr, ETHERADDRL); 11937c478bd9Sstevel@tonic-gate grp->lg_mac_addr_port = port; 11947c478bd9Sstevel@tonic-gate mac_addr_changed = B_TRUE; 1195da14cebeSEric Cheng mac_perim_exit(pmph); 11967c478bd9Sstevel@tonic-gate } 11977c478bd9Sstevel@tonic-gate grp->lg_addr_fixed = mac_fixed; 11987c478bd9Sstevel@tonic-gate } 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate if (mac_addr_changed) 12014deae11aSyz147064 link_state_changed = aggr_grp_update_ports_mac(grp); 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate if (update_mask & AGGR_MODIFY_LACP_MODE) 12047c478bd9Sstevel@tonic-gate aggr_lacp_update_mode(grp, lacp_mode); 12057c478bd9Sstevel@tonic-gate 1206da14cebeSEric Cheng if (update_mask & AGGR_MODIFY_LACP_TIMER) 12077c478bd9Sstevel@tonic-gate aggr_lacp_update_timer(grp, lacp_timer); 12087c478bd9Sstevel@tonic-gate 12094deae11aSyz147064 if (link_state_changed) 1210ba2e4443Sseb mac_link_update(grp->lg_mh, grp->lg_link_state); 12114deae11aSyz147064 1212da14cebeSEric Cheng if (mac_addr_changed) 1213da14cebeSEric Cheng mac_unicst_update(grp->lg_mh, grp->lg_addr); 1214da14cebeSEric Cheng 1215da14cebeSEric Cheng return (0); 12164deae11aSyz147064 } 12174deae11aSyz147064 1218da14cebeSEric Cheng /* 1219da14cebeSEric Cheng * Update properties of an existing link aggregation group. 1220da14cebeSEric Cheng */ 1221da14cebeSEric Cheng int 1222da14cebeSEric Cheng aggr_grp_modify(datalink_id_t linkid, uint8_t update_mask, uint32_t policy, 1223da14cebeSEric Cheng boolean_t mac_fixed, const uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, 1224da14cebeSEric Cheng aggr_lacp_timer_t lacp_timer) 1225da14cebeSEric Cheng { 1226da14cebeSEric Cheng aggr_grp_t *grp = NULL; 1227da14cebeSEric Cheng mac_perim_handle_t mph; 1228da14cebeSEric Cheng int err; 1229da14cebeSEric Cheng 1230da14cebeSEric Cheng /* get group corresponding to linkid */ 1231da14cebeSEric Cheng rw_enter(&aggr_grp_lock, RW_READER); 1232da14cebeSEric Cheng if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1233da14cebeSEric Cheng (mod_hash_val_t *)&grp) != 0) { 1234210db224Sericheng rw_exit(&aggr_grp_lock); 1235da14cebeSEric Cheng return (ENOENT); 12367c478bd9Sstevel@tonic-gate } 1237da14cebeSEric Cheng AGGR_GRP_REFHOLD(grp); 12387c478bd9Sstevel@tonic-gate 1239da14cebeSEric Cheng /* 1240da14cebeSEric Cheng * Hold the perimeter so that the aggregation won't be destroyed. 1241da14cebeSEric Cheng */ 1242da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 1243da14cebeSEric Cheng rw_exit(&aggr_grp_lock); 1244da14cebeSEric Cheng 1245da14cebeSEric Cheng err = aggr_grp_modify_common(grp, update_mask, policy, mac_fixed, 1246da14cebeSEric Cheng mac_addr, lacp_mode, lacp_timer); 1247da14cebeSEric Cheng 1248da14cebeSEric Cheng mac_perim_exit(mph); 12497c478bd9Sstevel@tonic-gate AGGR_GRP_REFRELE(grp); 1250da14cebeSEric Cheng return (err); 12517c478bd9Sstevel@tonic-gate } 12527c478bd9Sstevel@tonic-gate 12537c478bd9Sstevel@tonic-gate /* 12547c478bd9Sstevel@tonic-gate * Create a new link aggregation group upon request from administrator. 12557c478bd9Sstevel@tonic-gate * Returns 0 on success, an errno on failure. 12567c478bd9Sstevel@tonic-gate */ 12577c478bd9Sstevel@tonic-gate int 1258d62bc4baSyz147064 aggr_grp_create(datalink_id_t linkid, uint32_t key, uint_t nports, 1259d62bc4baSyz147064 laioc_port_t *ports, uint32_t policy, boolean_t mac_fixed, boolean_t force, 12602b24ab6bSSebastien Roy uchar_t *mac_addr, aggr_lacp_mode_t lacp_mode, aggr_lacp_timer_t lacp_timer, 12612b24ab6bSSebastien Roy cred_t *credp) 12627c478bd9Sstevel@tonic-gate { 12637c478bd9Sstevel@tonic-gate aggr_grp_t *grp = NULL; 12647c478bd9Sstevel@tonic-gate aggr_port_t *port; 1265ba2e4443Sseb mac_register_t *mac; 12664deae11aSyz147064 boolean_t link_state_changed; 1267da14cebeSEric Cheng mac_perim_handle_t mph; 12687c478bd9Sstevel@tonic-gate int err; 12697c478bd9Sstevel@tonic-gate int i; 12700dc2366fSVenugopal Iyer kt_did_t tid = 0; 12717c478bd9Sstevel@tonic-gate 12727c478bd9Sstevel@tonic-gate /* need at least one port */ 12737c478bd9Sstevel@tonic-gate if (nports == 0) 12747c478bd9Sstevel@tonic-gate return (EINVAL); 12757c478bd9Sstevel@tonic-gate 1276210db224Sericheng rw_enter(&aggr_grp_lock, RW_WRITER); 12777c478bd9Sstevel@tonic-gate 1278d62bc4baSyz147064 /* does a group with the same linkid already exist? */ 1279d62bc4baSyz147064 err = mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1280210db224Sericheng (mod_hash_val_t *)&grp); 1281210db224Sericheng if (err == 0) { 1282210db224Sericheng rw_exit(&aggr_grp_lock); 12837c478bd9Sstevel@tonic-gate return (EEXIST); 12847c478bd9Sstevel@tonic-gate } 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate grp = kmem_cache_alloc(aggr_grp_cache, KM_SLEEP); 12877c478bd9Sstevel@tonic-gate 12887c478bd9Sstevel@tonic-gate grp->lg_refs = 1; 12894deae11aSyz147064 grp->lg_closing = B_FALSE; 1290d62bc4baSyz147064 grp->lg_force = force; 1291d62bc4baSyz147064 grp->lg_linkid = linkid; 12922b24ab6bSSebastien Roy grp->lg_zoneid = crgetzoneid(credp); 12937c478bd9Sstevel@tonic-gate grp->lg_ifspeed = 0; 12947c478bd9Sstevel@tonic-gate grp->lg_link_state = LINK_STATE_UNKNOWN; 12957c478bd9Sstevel@tonic-gate grp->lg_link_duplex = LINK_DUPLEX_UNKNOWN; 12967c478bd9Sstevel@tonic-gate grp->lg_started = B_FALSE; 12977c478bd9Sstevel@tonic-gate grp->lg_promisc = B_FALSE; 1298da14cebeSEric Cheng grp->lg_lacp_done = B_FALSE; 12990dc2366fSVenugopal Iyer grp->lg_tx_notify_done = B_FALSE; 1300da14cebeSEric Cheng grp->lg_lacp_head = grp->lg_lacp_tail = NULL; 1301da14cebeSEric Cheng grp->lg_lacp_rx_thread = thread_create(NULL, 0, 1302da14cebeSEric Cheng aggr_lacp_rx_thread, grp, 0, &p0, TS_RUN, minclsyspri); 13030dc2366fSVenugopal Iyer grp->lg_tx_notify_thread = thread_create(NULL, 0, 13040dc2366fSVenugopal Iyer aggr_tx_notify_thread, grp, 0, &p0, TS_RUN, minclsyspri); 13050dc2366fSVenugopal Iyer grp->lg_tx_blocked_rings = kmem_zalloc((sizeof (mac_ring_handle_t *) * 13060dc2366fSVenugopal Iyer MAX_RINGS_PER_GROUP), KM_SLEEP); 13070dc2366fSVenugopal Iyer grp->lg_tx_blocked_cnt = 0; 1308da14cebeSEric Cheng bzero(&grp->lg_rx_group, sizeof (aggr_pseudo_rx_group_t)); 13090dc2366fSVenugopal Iyer bzero(&grp->lg_tx_group, sizeof (aggr_pseudo_tx_group_t)); 13107c478bd9Sstevel@tonic-gate aggr_lacp_init_grp(grp); 13117c478bd9Sstevel@tonic-gate 13127c478bd9Sstevel@tonic-gate /* add MAC ports to group */ 13137c478bd9Sstevel@tonic-gate grp->lg_ports = NULL; 13147c478bd9Sstevel@tonic-gate grp->lg_nports = 0; 13157c478bd9Sstevel@tonic-gate grp->lg_nattached_ports = 0; 13167c478bd9Sstevel@tonic-gate grp->lg_ntx_ports = 0; 13177c478bd9Sstevel@tonic-gate 1318d62bc4baSyz147064 /* 1319d62bc4baSyz147064 * If key is not specified by the user, allocate the key. 1320d62bc4baSyz147064 */ 1321d62bc4baSyz147064 if ((key == 0) && ((key = (uint32_t)id_alloc(key_ids)) == 0)) { 1322d62bc4baSyz147064 err = ENOMEM; 1323d62bc4baSyz147064 goto bail; 1324d62bc4baSyz147064 } 1325d62bc4baSyz147064 grp->lg_key = key; 1326d62bc4baSyz147064 13277c478bd9Sstevel@tonic-gate for (i = 0; i < nports; i++) { 1328d62bc4baSyz147064 err = aggr_grp_add_port(grp, ports[i].lp_linkid, force, NULL); 13297c478bd9Sstevel@tonic-gate if (err != 0) 13307c478bd9Sstevel@tonic-gate goto bail; 13317c478bd9Sstevel@tonic-gate } 13327c478bd9Sstevel@tonic-gate 13337c478bd9Sstevel@tonic-gate /* 13347c478bd9Sstevel@tonic-gate * If no explicit MAC address was specified by the administrator, 13357c478bd9Sstevel@tonic-gate * set it to the MAC address of the first port. 13367c478bd9Sstevel@tonic-gate */ 13377c478bd9Sstevel@tonic-gate grp->lg_addr_fixed = mac_fixed; 13387c478bd9Sstevel@tonic-gate if (grp->lg_addr_fixed) { 13397c478bd9Sstevel@tonic-gate /* validate specified address */ 13407c478bd9Sstevel@tonic-gate if (bcmp(aggr_zero_mac, mac_addr, ETHERADDRL) == 0) { 13417c478bd9Sstevel@tonic-gate err = EINVAL; 13427c478bd9Sstevel@tonic-gate goto bail; 13437c478bd9Sstevel@tonic-gate } 13447c478bd9Sstevel@tonic-gate bcopy(mac_addr, grp->lg_addr, ETHERADDRL); 13457c478bd9Sstevel@tonic-gate } else { 13467c478bd9Sstevel@tonic-gate bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 13477c478bd9Sstevel@tonic-gate grp->lg_mac_addr_port = grp->lg_ports; 13487c478bd9Sstevel@tonic-gate } 13497c478bd9Sstevel@tonic-gate 13507c478bd9Sstevel@tonic-gate /* set the initial group capabilities */ 13517c478bd9Sstevel@tonic-gate aggr_grp_capab_set(grp); 13527c478bd9Sstevel@tonic-gate 1353d62bc4baSyz147064 if ((mac = mac_alloc(MAC_VERSION)) == NULL) { 1354d62bc4baSyz147064 err = ENOMEM; 1355ba2e4443Sseb goto bail; 1356d62bc4baSyz147064 } 1357ba2e4443Sseb mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 1358ba2e4443Sseb mac->m_driver = grp; 1359ba2e4443Sseb mac->m_dip = aggr_dip; 1360d62bc4baSyz147064 mac->m_instance = grp->lg_key > AGGR_MAX_KEY ? (uint_t)-1 : grp->lg_key; 1361ba2e4443Sseb mac->m_src_addr = grp->lg_addr; 1362ba2e4443Sseb mac->m_callbacks = &aggr_m_callbacks; 1363ba2e4443Sseb mac->m_min_sdu = 0; 1364f4420ae7Snd99603 mac->m_max_sdu = grp->lg_max_sdu = aggr_grp_max_sdu(grp); 1365d62bc4baSyz147064 mac->m_margin = aggr_grp_max_margin(grp); 1366da14cebeSEric Cheng mac->m_v12n = MAC_VIRT_LEVEL1; 1367ba2e4443Sseb err = mac_register(mac, &grp->lg_mh); 1368ba2e4443Sseb mac_free(mac); 1369ba2e4443Sseb if (err != 0) 13707c478bd9Sstevel@tonic-gate goto bail; 13717c478bd9Sstevel@tonic-gate 13722b24ab6bSSebastien Roy err = dls_devnet_create(grp->lg_mh, grp->lg_linkid, crgetzoneid(credp)); 13732b24ab6bSSebastien Roy if (err != 0) { 1374d62bc4baSyz147064 (void) mac_unregister(grp->lg_mh); 1375da14cebeSEric Cheng grp->lg_mh = NULL; 1376d62bc4baSyz147064 goto bail; 1377d62bc4baSyz147064 } 1378d62bc4baSyz147064 1379da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 1380da14cebeSEric Cheng 1381da14cebeSEric Cheng /* 1382da14cebeSEric Cheng * Update the MAC address of the constituent ports. 1383da14cebeSEric Cheng * None of the port is attached at this time, the link state of the 1384da14cebeSEric Cheng * aggregation will not change. 1385da14cebeSEric Cheng */ 1386da14cebeSEric Cheng link_state_changed = aggr_grp_update_ports_mac(grp); 1387da14cebeSEric Cheng ASSERT(!link_state_changed); 1388da14cebeSEric Cheng 1389da14cebeSEric Cheng /* update outbound load balancing policy */ 1390da14cebeSEric Cheng aggr_send_update_policy(grp, policy); 1391da14cebeSEric Cheng 13927c478bd9Sstevel@tonic-gate /* set LACP mode */ 13937c478bd9Sstevel@tonic-gate aggr_lacp_set_mode(grp, lacp_mode, lacp_timer); 13947c478bd9Sstevel@tonic-gate 1395c615009fSyz147064 /* 1396c615009fSyz147064 * Attach each port if necessary. 1397c615009fSyz147064 */ 1398392b1d6eSyz147064 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1399da14cebeSEric Cheng /* 1400da14cebeSEric Cheng * Create the pseudo ring for each HW ring of the underlying 1401da14cebeSEric Cheng * port. Note that this is done after the aggr registers the 1402da14cebeSEric Cheng * mac. 1403da14cebeSEric Cheng */ 14040dc2366fSVenugopal Iyer VERIFY(aggr_add_pseudo_tx_group(port, &grp->lg_tx_group) == 0); 1405da14cebeSEric Cheng VERIFY(aggr_add_pseudo_rx_group(port, &grp->lg_rx_group) == 0); 1406da14cebeSEric Cheng if (aggr_port_notify_link(grp, port)) 1407392b1d6eSyz147064 link_state_changed = B_TRUE; 1408da14cebeSEric Cheng 1409da14cebeSEric Cheng /* 1410da14cebeSEric Cheng * Initialize the callback functions for this port. 1411da14cebeSEric Cheng */ 1412da14cebeSEric Cheng aggr_port_init_callbacks(port); 1413392b1d6eSyz147064 } 1414392b1d6eSyz147064 1415392b1d6eSyz147064 if (link_state_changed) 1416392b1d6eSyz147064 mac_link_update(grp->lg_mh, grp->lg_link_state); 1417c615009fSyz147064 14187c478bd9Sstevel@tonic-gate /* add new group to hash table */ 1419d62bc4baSyz147064 err = mod_hash_insert(aggr_grp_hash, GRP_HASH_KEY(linkid), 1420210db224Sericheng (mod_hash_val_t)grp); 14217c478bd9Sstevel@tonic-gate ASSERT(err == 0); 1422210db224Sericheng aggr_grp_cnt++; 14237c478bd9Sstevel@tonic-gate 1424da14cebeSEric Cheng mac_perim_exit(mph); 1425210db224Sericheng rw_exit(&aggr_grp_lock); 14267c478bd9Sstevel@tonic-gate return (0); 14277c478bd9Sstevel@tonic-gate 14287c478bd9Sstevel@tonic-gate bail: 14297c478bd9Sstevel@tonic-gate 14304deae11aSyz147064 grp->lg_closing = B_TRUE; 1431490ed22dSyz147064 14327c478bd9Sstevel@tonic-gate port = grp->lg_ports; 14337c478bd9Sstevel@tonic-gate while (port != NULL) { 1434da14cebeSEric Cheng aggr_port_t *cport; 1435da14cebeSEric Cheng 14367c478bd9Sstevel@tonic-gate cport = port->lp_next; 14377c478bd9Sstevel@tonic-gate aggr_port_delete(port); 14387c478bd9Sstevel@tonic-gate port = cport; 14397c478bd9Sstevel@tonic-gate } 14407c478bd9Sstevel@tonic-gate 1441da14cebeSEric Cheng /* 1442da14cebeSEric Cheng * Inform the lacp_rx thread to exit. 1443da14cebeSEric Cheng */ 1444da14cebeSEric Cheng mutex_enter(&grp->lg_lacp_lock); 1445da14cebeSEric Cheng grp->lg_lacp_done = B_TRUE; 1446da14cebeSEric Cheng cv_signal(&grp->lg_lacp_cv); 1447da14cebeSEric Cheng while (grp->lg_lacp_rx_thread != NULL) 1448da14cebeSEric Cheng cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1449da14cebeSEric Cheng mutex_exit(&grp->lg_lacp_lock); 14500dc2366fSVenugopal Iyer /* 14510dc2366fSVenugopal Iyer * Inform the tx_notify thread to exit. 14520dc2366fSVenugopal Iyer */ 14530dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock); 14540dc2366fSVenugopal Iyer if (grp->lg_tx_notify_thread != NULL) { 14550dc2366fSVenugopal Iyer tid = grp->lg_tx_notify_thread->t_did; 14560dc2366fSVenugopal Iyer grp->lg_tx_notify_done = B_TRUE; 14570dc2366fSVenugopal Iyer cv_signal(&grp->lg_tx_flowctl_cv); 14580dc2366fSVenugopal Iyer } 14590dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock); 14600dc2366fSVenugopal Iyer if (tid != 0) 14610dc2366fSVenugopal Iyer thread_join(tid); 14627c478bd9Sstevel@tonic-gate 14630dc2366fSVenugopal Iyer kmem_free(grp->lg_tx_blocked_rings, 14640dc2366fSVenugopal Iyer (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); 1465210db224Sericheng rw_exit(&aggr_grp_lock); 1466da14cebeSEric Cheng AGGR_GRP_REFRELE(grp); 14677c478bd9Sstevel@tonic-gate return (err); 14687c478bd9Sstevel@tonic-gate } 14697c478bd9Sstevel@tonic-gate 14707c478bd9Sstevel@tonic-gate /* 1471d62bc4baSyz147064 * Return a pointer to the member of a group with specified linkid. 14727c478bd9Sstevel@tonic-gate */ 14737c478bd9Sstevel@tonic-gate static aggr_port_t * 1474d62bc4baSyz147064 aggr_grp_port_lookup(aggr_grp_t *grp, datalink_id_t linkid) 14757c478bd9Sstevel@tonic-gate { 14767c478bd9Sstevel@tonic-gate aggr_port_t *port; 14777c478bd9Sstevel@tonic-gate 1478da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 14797c478bd9Sstevel@tonic-gate 14807c478bd9Sstevel@tonic-gate for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1481d62bc4baSyz147064 if (port->lp_linkid == linkid) 14827c478bd9Sstevel@tonic-gate break; 14837c478bd9Sstevel@tonic-gate } 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate return (port); 14867c478bd9Sstevel@tonic-gate } 14877c478bd9Sstevel@tonic-gate 14887c478bd9Sstevel@tonic-gate /* 14897c478bd9Sstevel@tonic-gate * Stop, detach and remove a port from a link aggregation group. 14907c478bd9Sstevel@tonic-gate */ 14917c478bd9Sstevel@tonic-gate static int 14924deae11aSyz147064 aggr_grp_rem_port(aggr_grp_t *grp, aggr_port_t *port, 14934deae11aSyz147064 boolean_t *mac_addr_changedp, boolean_t *link_state_changedp) 14947c478bd9Sstevel@tonic-gate { 14954deae11aSyz147064 int rc = 0; 14967c478bd9Sstevel@tonic-gate aggr_port_t **pport; 14974deae11aSyz147064 boolean_t mac_addr_changed = B_FALSE; 14984deae11aSyz147064 boolean_t link_state_changed = B_FALSE; 1499da14cebeSEric Cheng mac_perim_handle_t mph; 15007c478bd9Sstevel@tonic-gate uint64_t val; 15017c478bd9Sstevel@tonic-gate uint_t i; 1502ba2e4443Sseb uint_t stat; 15037c478bd9Sstevel@tonic-gate 1504da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 15057c478bd9Sstevel@tonic-gate ASSERT(grp->lg_nports > 1); 15064deae11aSyz147064 ASSERT(!grp->lg_closing); 15077c478bd9Sstevel@tonic-gate 15087c478bd9Sstevel@tonic-gate /* unlink port */ 15097c478bd9Sstevel@tonic-gate for (pport = &grp->lg_ports; *pport != port; 15107c478bd9Sstevel@tonic-gate pport = &(*pport)->lp_next) { 15114deae11aSyz147064 if (*pport == NULL) { 15124deae11aSyz147064 rc = ENOENT; 15134deae11aSyz147064 goto done; 15144deae11aSyz147064 } 15157c478bd9Sstevel@tonic-gate } 15167c478bd9Sstevel@tonic-gate *pport = port->lp_next; 15177c478bd9Sstevel@tonic-gate 1518da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &mph); 15197c478bd9Sstevel@tonic-gate 15207c478bd9Sstevel@tonic-gate /* 15217c478bd9Sstevel@tonic-gate * If the MAC address of the port being removed was assigned 15227c478bd9Sstevel@tonic-gate * to the group, update the group MAC address 15237c478bd9Sstevel@tonic-gate * using the MAC address of a different port. 15247c478bd9Sstevel@tonic-gate */ 15257c478bd9Sstevel@tonic-gate if (!grp->lg_addr_fixed && grp->lg_mac_addr_port == port) { 15267c478bd9Sstevel@tonic-gate /* 15277c478bd9Sstevel@tonic-gate * Set the MAC address of the group to the 15287c478bd9Sstevel@tonic-gate * MAC address of its first port. 15297c478bd9Sstevel@tonic-gate */ 15307c478bd9Sstevel@tonic-gate bcopy(grp->lg_ports->lp_addr, grp->lg_addr, ETHERADDRL); 15317c478bd9Sstevel@tonic-gate grp->lg_mac_addr_port = grp->lg_ports; 15324deae11aSyz147064 mac_addr_changed = B_TRUE; 15337c478bd9Sstevel@tonic-gate } 15347c478bd9Sstevel@tonic-gate 1535da14cebeSEric Cheng link_state_changed = aggr_grp_detach_port(grp, port); 15367c478bd9Sstevel@tonic-gate 15377c478bd9Sstevel@tonic-gate /* 1538ba2e4443Sseb * Add the counter statistics of the ports while it was aggregated 1539ba2e4443Sseb * to the group's residual statistics. This is done by obtaining 1540ba2e4443Sseb * the current counter from the underlying MAC then subtracting the 1541ba2e4443Sseb * value of the counter at the moment it was added to the 1542ba2e4443Sseb * aggregation. 15437c478bd9Sstevel@tonic-gate */ 1544da14cebeSEric Cheng for (i = 0; i < MAC_NSTAT; i++) { 1545ba2e4443Sseb stat = i + MAC_STAT_MIN; 1546ba2e4443Sseb if (!MAC_STAT_ISACOUNTER(stat)) 15477c478bd9Sstevel@tonic-gate continue; 1548ba2e4443Sseb val = aggr_port_stat(port, stat); 15497c478bd9Sstevel@tonic-gate val -= port->lp_stat[i]; 15507c478bd9Sstevel@tonic-gate grp->lg_stat[i] += val; 15517c478bd9Sstevel@tonic-gate } 1552da14cebeSEric Cheng for (i = 0; i < ETHER_NSTAT; i++) { 1553ba2e4443Sseb stat = i + MACTYPE_STAT_MIN; 1554ba2e4443Sseb if (!ETHER_STAT_ISACOUNTER(stat)) 1555ba2e4443Sseb continue; 1556ba2e4443Sseb val = aggr_port_stat(port, stat); 1557ba2e4443Sseb val -= port->lp_ether_stat[i]; 1558ba2e4443Sseb grp->lg_ether_stat[i] += val; 1559ba2e4443Sseb } 15607c478bd9Sstevel@tonic-gate 15617c478bd9Sstevel@tonic-gate grp->lg_nports--; 1562da14cebeSEric Cheng mac_perim_exit(mph); 15637c478bd9Sstevel@tonic-gate 15640dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 15657c478bd9Sstevel@tonic-gate aggr_port_delete(port); 15667c478bd9Sstevel@tonic-gate 15677c478bd9Sstevel@tonic-gate /* 15687c478bd9Sstevel@tonic-gate * If the group MAC address has changed, update the MAC address of 1569d62bc4baSyz147064 * the remaining constituent ports according to the new MAC 15707c478bd9Sstevel@tonic-gate * address of the group. 15717c478bd9Sstevel@tonic-gate */ 1572392b1d6eSyz147064 if (mac_addr_changed && aggr_grp_update_ports_mac(grp)) 1573392b1d6eSyz147064 link_state_changed = B_TRUE; 15747c478bd9Sstevel@tonic-gate 15754deae11aSyz147064 done: 15764deae11aSyz147064 if (mac_addr_changedp != NULL) 15774deae11aSyz147064 *mac_addr_changedp = mac_addr_changed; 15784deae11aSyz147064 if (link_state_changedp != NULL) 15794deae11aSyz147064 *link_state_changedp = link_state_changed; 15807c478bd9Sstevel@tonic-gate 15814deae11aSyz147064 return (rc); 15827c478bd9Sstevel@tonic-gate } 15837c478bd9Sstevel@tonic-gate 15847c478bd9Sstevel@tonic-gate /* 15857c478bd9Sstevel@tonic-gate * Remove one or more ports from an existing link aggregation group. 15867c478bd9Sstevel@tonic-gate */ 15877c478bd9Sstevel@tonic-gate int 1588d62bc4baSyz147064 aggr_grp_rem_ports(datalink_id_t linkid, uint_t nports, laioc_port_t *ports) 15897c478bd9Sstevel@tonic-gate { 15907c478bd9Sstevel@tonic-gate int rc = 0, i; 15917c478bd9Sstevel@tonic-gate aggr_grp_t *grp = NULL; 15927c478bd9Sstevel@tonic-gate aggr_port_t *port; 15934deae11aSyz147064 boolean_t mac_addr_update = B_FALSE, mac_addr_changed; 15944deae11aSyz147064 boolean_t link_state_update = B_FALSE, link_state_changed; 1595da14cebeSEric Cheng mac_perim_handle_t mph, pmph; 15967c478bd9Sstevel@tonic-gate 1597d62bc4baSyz147064 /* get group corresponding to linkid */ 1598210db224Sericheng rw_enter(&aggr_grp_lock, RW_READER); 1599d62bc4baSyz147064 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1600210db224Sericheng (mod_hash_val_t *)&grp) != 0) { 1601210db224Sericheng rw_exit(&aggr_grp_lock); 1602210db224Sericheng return (ENOENT); 16037c478bd9Sstevel@tonic-gate } 16047c478bd9Sstevel@tonic-gate AGGR_GRP_REFHOLD(grp); 1605210db224Sericheng 1606da14cebeSEric Cheng /* 1607da14cebeSEric Cheng * Hold the perimeter so that the aggregation won't be destroyed. 1608da14cebeSEric Cheng */ 1609da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 1610da14cebeSEric Cheng rw_exit(&aggr_grp_lock); 16117c478bd9Sstevel@tonic-gate 16127c478bd9Sstevel@tonic-gate /* we need to keep at least one port per group */ 16137c478bd9Sstevel@tonic-gate if (nports >= grp->lg_nports) { 16147c478bd9Sstevel@tonic-gate rc = EINVAL; 16157c478bd9Sstevel@tonic-gate goto bail; 16167c478bd9Sstevel@tonic-gate } 16177c478bd9Sstevel@tonic-gate 16187c478bd9Sstevel@tonic-gate /* first verify that all the groups are valid */ 16197c478bd9Sstevel@tonic-gate for (i = 0; i < nports; i++) { 1620d62bc4baSyz147064 if (aggr_grp_port_lookup(grp, ports[i].lp_linkid) == NULL) { 16217c478bd9Sstevel@tonic-gate /* port not found */ 16227c478bd9Sstevel@tonic-gate rc = ENOENT; 16237c478bd9Sstevel@tonic-gate goto bail; 16247c478bd9Sstevel@tonic-gate } 16257c478bd9Sstevel@tonic-gate } 16267c478bd9Sstevel@tonic-gate 1627da14cebeSEric Cheng /* clear the promiscous mode for the specified ports */ 1628da14cebeSEric Cheng for (i = 0; i < nports && rc == 0; i++) { 1629da14cebeSEric Cheng /* lookup port */ 1630da14cebeSEric Cheng port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 1631da14cebeSEric Cheng ASSERT(port != NULL); 1632da14cebeSEric Cheng 1633da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 1634da14cebeSEric Cheng rc = aggr_port_promisc(port, B_FALSE); 1635da14cebeSEric Cheng mac_perim_exit(pmph); 1636da14cebeSEric Cheng } 1637da14cebeSEric Cheng if (rc != 0) { 1638da14cebeSEric Cheng for (i = 0; i < nports; i++) { 1639da14cebeSEric Cheng port = aggr_grp_port_lookup(grp, 1640da14cebeSEric Cheng ports[i].lp_linkid); 1641da14cebeSEric Cheng ASSERT(port != NULL); 1642da14cebeSEric Cheng 1643da14cebeSEric Cheng /* 1644da14cebeSEric Cheng * Turn the promiscuous mode back on if it is required 1645da14cebeSEric Cheng * to receive the non-primary address over a port, or 1646da14cebeSEric Cheng * the promiscous mode is enabled over the aggr. 1647da14cebeSEric Cheng */ 1648da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 1649da14cebeSEric Cheng if (port->lp_started && (grp->lg_promisc || 1650da14cebeSEric Cheng port->lp_prom_addr != NULL)) { 1651da14cebeSEric Cheng (void) aggr_port_promisc(port, B_TRUE); 1652da14cebeSEric Cheng } 1653da14cebeSEric Cheng mac_perim_exit(pmph); 1654da14cebeSEric Cheng } 1655da14cebeSEric Cheng goto bail; 1656da14cebeSEric Cheng } 1657da14cebeSEric Cheng 16587c478bd9Sstevel@tonic-gate /* remove the specified ports from group */ 1659da14cebeSEric Cheng for (i = 0; i < nports; i++) { 16607c478bd9Sstevel@tonic-gate /* lookup port */ 1661d62bc4baSyz147064 port = aggr_grp_port_lookup(grp, ports[i].lp_linkid); 16627c478bd9Sstevel@tonic-gate ASSERT(port != NULL); 16637c478bd9Sstevel@tonic-gate 16647c478bd9Sstevel@tonic-gate /* stop port if group has already been started */ 16657c478bd9Sstevel@tonic-gate if (grp->lg_started) { 1666da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 16677c478bd9Sstevel@tonic-gate aggr_port_stop(port); 1668da14cebeSEric Cheng mac_perim_exit(pmph); 16697c478bd9Sstevel@tonic-gate } 16707c478bd9Sstevel@tonic-gate 16710dc2366fSVenugopal Iyer /* 16720dc2366fSVenugopal Iyer * aggr_rem_pseudo_tx_group() is not called here. Instead 16730dc2366fSVenugopal Iyer * it is called from inside aggr_grp_rem_port() after the 16740dc2366fSVenugopal Iyer * port has been detached. The reason is that 16750dc2366fSVenugopal Iyer * aggr_rem_pseudo_tx_group() removes one ring at a time 16760dc2366fSVenugopal Iyer * and if there is still traffic going on, then there 16770dc2366fSVenugopal Iyer * is the possibility of aggr_find_tx_ring() returning a 16780dc2366fSVenugopal Iyer * removed ring for transmission. Once the port has been 16790dc2366fSVenugopal Iyer * detached, that port will not be used and 16800dc2366fSVenugopal Iyer * aggr_find_tx_ring() will not return any rings 16810dc2366fSVenugopal Iyer * belonging to it. 16820dc2366fSVenugopal Iyer */ 1683da14cebeSEric Cheng aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 16840dc2366fSVenugopal Iyer 16857c478bd9Sstevel@tonic-gate /* remove port from group */ 16864deae11aSyz147064 rc = aggr_grp_rem_port(grp, port, &mac_addr_changed, 16874deae11aSyz147064 &link_state_changed); 16887c478bd9Sstevel@tonic-gate ASSERT(rc == 0); 16894deae11aSyz147064 mac_addr_update = mac_addr_update || mac_addr_changed; 16904deae11aSyz147064 link_state_update = link_state_update || link_state_changed; 16917c478bd9Sstevel@tonic-gate } 16927c478bd9Sstevel@tonic-gate 16937c478bd9Sstevel@tonic-gate bail: 16944deae11aSyz147064 if (mac_addr_update) 1695ba2e4443Sseb mac_unicst_update(grp->lg_mh, grp->lg_addr); 16964deae11aSyz147064 if (link_state_update) 1697ba2e4443Sseb mac_link_update(grp->lg_mh, grp->lg_link_state); 1698da14cebeSEric Cheng 1699da14cebeSEric Cheng mac_perim_exit(mph); 17007c478bd9Sstevel@tonic-gate AGGR_GRP_REFRELE(grp); 17017c478bd9Sstevel@tonic-gate 17027c478bd9Sstevel@tonic-gate return (rc); 17037c478bd9Sstevel@tonic-gate } 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate int 17062b24ab6bSSebastien Roy aggr_grp_delete(datalink_id_t linkid, cred_t *cred) 17077c478bd9Sstevel@tonic-gate { 1708210db224Sericheng aggr_grp_t *grp = NULL; 17097c478bd9Sstevel@tonic-gate aggr_port_t *port, *cport; 1710d62bc4baSyz147064 datalink_id_t tmpid; 1711210db224Sericheng mod_hash_val_t val; 1712da14cebeSEric Cheng mac_perim_handle_t mph, pmph; 17130466663dSyz147064 int err; 17140dc2366fSVenugopal Iyer kt_did_t tid = 0; 17157c478bd9Sstevel@tonic-gate 1716210db224Sericheng rw_enter(&aggr_grp_lock, RW_WRITER); 17177c478bd9Sstevel@tonic-gate 1718d62bc4baSyz147064 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1719210db224Sericheng (mod_hash_val_t *)&grp) != 0) { 1720210db224Sericheng rw_exit(&aggr_grp_lock); 1721210db224Sericheng return (ENOENT); 17227c478bd9Sstevel@tonic-gate } 1723490ed22dSyz147064 1724d62bc4baSyz147064 /* 1725d62bc4baSyz147064 * Note that dls_devnet_destroy() must be called before lg_lock is 1726d62bc4baSyz147064 * held. Otherwise, it will deadlock if another thread is in 1727d62bc4baSyz147064 * aggr_m_stat() and thus has a kstat_hold() on the kstats that 1728d62bc4baSyz147064 * dls_devnet_destroy() needs to delete. 1729d62bc4baSyz147064 */ 1730da14cebeSEric Cheng if ((err = dls_devnet_destroy(grp->lg_mh, &tmpid, B_TRUE)) != 0) { 1731d62bc4baSyz147064 rw_exit(&aggr_grp_lock); 1732d62bc4baSyz147064 return (err); 1733d62bc4baSyz147064 } 1734d62bc4baSyz147064 ASSERT(linkid == tmpid); 1735d62bc4baSyz147064 17367c478bd9Sstevel@tonic-gate /* 17377c478bd9Sstevel@tonic-gate * Unregister from the MAC service module. Since this can 17387c478bd9Sstevel@tonic-gate * fail if a client hasn't closed the MAC port, we gracefully 17397c478bd9Sstevel@tonic-gate * fail the operation. 17407c478bd9Sstevel@tonic-gate */ 17410466663dSyz147064 if ((err = mac_disable(grp->lg_mh)) != 0) { 17422b24ab6bSSebastien Roy (void) dls_devnet_create(grp->lg_mh, linkid, crgetzoneid(cred)); 1743210db224Sericheng rw_exit(&aggr_grp_lock); 17440466663dSyz147064 return (err); 17457c478bd9Sstevel@tonic-gate } 1746d62bc4baSyz147064 (void) mod_hash_remove(aggr_grp_hash, GRP_HASH_KEY(linkid), &val); 1747210db224Sericheng ASSERT(grp == (aggr_grp_t *)val); 17487c478bd9Sstevel@tonic-gate 1749210db224Sericheng ASSERT(aggr_grp_cnt > 0); 1750210db224Sericheng aggr_grp_cnt--; 1751210db224Sericheng rw_exit(&aggr_grp_lock); 17527c478bd9Sstevel@tonic-gate 1753da14cebeSEric Cheng /* 1754da14cebeSEric Cheng * Inform the lacp_rx thread to exit. 1755da14cebeSEric Cheng */ 1756da14cebeSEric Cheng mutex_enter(&grp->lg_lacp_lock); 1757da14cebeSEric Cheng grp->lg_lacp_done = B_TRUE; 1758da14cebeSEric Cheng cv_signal(&grp->lg_lacp_cv); 1759da14cebeSEric Cheng while (grp->lg_lacp_rx_thread != NULL) 1760da14cebeSEric Cheng cv_wait(&grp->lg_lacp_cv, &grp->lg_lacp_lock); 1761da14cebeSEric Cheng mutex_exit(&grp->lg_lacp_lock); 17620dc2366fSVenugopal Iyer /* 17630dc2366fSVenugopal Iyer * Inform the tx_notify_thread to exit. 17640dc2366fSVenugopal Iyer */ 17650dc2366fSVenugopal Iyer mutex_enter(&grp->lg_tx_flowctl_lock); 17660dc2366fSVenugopal Iyer if (grp->lg_tx_notify_thread != NULL) { 17670dc2366fSVenugopal Iyer tid = grp->lg_tx_notify_thread->t_did; 17680dc2366fSVenugopal Iyer grp->lg_tx_notify_done = B_TRUE; 17690dc2366fSVenugopal Iyer cv_signal(&grp->lg_tx_flowctl_cv); 17700dc2366fSVenugopal Iyer } 17710dc2366fSVenugopal Iyer mutex_exit(&grp->lg_tx_flowctl_lock); 17720dc2366fSVenugopal Iyer if (tid != 0) 17730dc2366fSVenugopal Iyer thread_join(tid); 1774da14cebeSEric Cheng 1775da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 1776da14cebeSEric Cheng 1777da14cebeSEric Cheng grp->lg_closing = B_TRUE; 1778da14cebeSEric Cheng /* detach and free MAC ports associated with group */ 1779da14cebeSEric Cheng port = grp->lg_ports; 1780da14cebeSEric Cheng while (port != NULL) { 1781da14cebeSEric Cheng cport = port->lp_next; 1782da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 1783da14cebeSEric Cheng if (grp->lg_started) 1784da14cebeSEric Cheng aggr_port_stop(port); 1785da14cebeSEric Cheng (void) aggr_grp_detach_port(grp, port); 1786da14cebeSEric Cheng mac_perim_exit(pmph); 17870dc2366fSVenugopal Iyer aggr_rem_pseudo_tx_group(port, &grp->lg_tx_group); 1788da14cebeSEric Cheng aggr_rem_pseudo_rx_group(port, &grp->lg_rx_group); 1789da14cebeSEric Cheng aggr_port_delete(port); 1790da14cebeSEric Cheng port = cport; 1791da14cebeSEric Cheng } 1792da14cebeSEric Cheng 1793da14cebeSEric Cheng mac_perim_exit(mph); 1794da14cebeSEric Cheng 17950dc2366fSVenugopal Iyer kmem_free(grp->lg_tx_blocked_rings, 17960dc2366fSVenugopal Iyer (sizeof (mac_ring_handle_t *) * MAX_RINGS_PER_GROUP)); 1797da14cebeSEric Cheng /* 1798da14cebeSEric Cheng * Wait for the port's lacp timer thread and its notification callback 1799da14cebeSEric Cheng * to exit before calling mac_unregister() since both needs to access 1800da14cebeSEric Cheng * the mac perimeter of the grp. 1801da14cebeSEric Cheng */ 1802da14cebeSEric Cheng aggr_grp_port_wait(grp); 1803da14cebeSEric Cheng 1804da14cebeSEric Cheng VERIFY(mac_unregister(grp->lg_mh) == 0); 1805da14cebeSEric Cheng grp->lg_mh = NULL; 1806da14cebeSEric Cheng 1807da14cebeSEric Cheng AGGR_GRP_REFRELE(grp); 18087c478bd9Sstevel@tonic-gate return (0); 18097c478bd9Sstevel@tonic-gate } 18107c478bd9Sstevel@tonic-gate 18117c478bd9Sstevel@tonic-gate void 18127c478bd9Sstevel@tonic-gate aggr_grp_free(aggr_grp_t *grp) 18137c478bd9Sstevel@tonic-gate { 18147c478bd9Sstevel@tonic-gate ASSERT(grp->lg_refs == 0); 1815da14cebeSEric Cheng ASSERT(grp->lg_port_ref == 0); 1816d62bc4baSyz147064 if (grp->lg_key > AGGR_MAX_KEY) { 1817d62bc4baSyz147064 id_free(key_ids, grp->lg_key); 1818d62bc4baSyz147064 grp->lg_key = 0; 1819d62bc4baSyz147064 } 18207c478bd9Sstevel@tonic-gate kmem_cache_free(aggr_grp_cache, grp); 18217c478bd9Sstevel@tonic-gate } 18227c478bd9Sstevel@tonic-gate 1823d62bc4baSyz147064 int 1824d62bc4baSyz147064 aggr_grp_info(datalink_id_t linkid, void *fn_arg, 1825d62bc4baSyz147064 aggr_grp_info_new_grp_fn_t new_grp_fn, 18262b24ab6bSSebastien Roy aggr_grp_info_new_port_fn_t new_port_fn, cred_t *cred) 18277c478bd9Sstevel@tonic-gate { 18287c478bd9Sstevel@tonic-gate aggr_grp_t *grp; 18297c478bd9Sstevel@tonic-gate aggr_port_t *port; 1830da14cebeSEric Cheng mac_perim_handle_t mph, pmph; 1831d62bc4baSyz147064 int rc = 0; 18327c478bd9Sstevel@tonic-gate 18332b24ab6bSSebastien Roy /* 18342b24ab6bSSebastien Roy * Make sure that the aggregation link is visible from the caller's 18352b24ab6bSSebastien Roy * zone. 18362b24ab6bSSebastien Roy */ 18372b24ab6bSSebastien Roy if (!dls_devnet_islinkvisible(linkid, crgetzoneid(cred))) 18382b24ab6bSSebastien Roy return (ENOENT); 18392b24ab6bSSebastien Roy 1840d62bc4baSyz147064 rw_enter(&aggr_grp_lock, RW_READER); 18417c478bd9Sstevel@tonic-gate 1842d62bc4baSyz147064 if (mod_hash_find(aggr_grp_hash, GRP_HASH_KEY(linkid), 1843d62bc4baSyz147064 (mod_hash_val_t *)&grp) != 0) { 1844d62bc4baSyz147064 rw_exit(&aggr_grp_lock); 1845d62bc4baSyz147064 return (ENOENT); 1846d62bc4baSyz147064 } 1847da14cebeSEric Cheng AGGR_GRP_REFHOLD(grp); 18487c478bd9Sstevel@tonic-gate 1849da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 1850da14cebeSEric Cheng rw_exit(&aggr_grp_lock); 18517c478bd9Sstevel@tonic-gate 1852d62bc4baSyz147064 rc = new_grp_fn(fn_arg, grp->lg_linkid, 1853d62bc4baSyz147064 (grp->lg_key > AGGR_MAX_KEY) ? 0 : grp->lg_key, grp->lg_addr, 1854d62bc4baSyz147064 grp->lg_addr_fixed, grp->lg_force, grp->lg_tx_policy, 18557c478bd9Sstevel@tonic-gate grp->lg_nports, grp->lg_lacp_mode, grp->aggr.PeriodicTimer); 18567c478bd9Sstevel@tonic-gate 1857d62bc4baSyz147064 if (rc != 0) 18587c478bd9Sstevel@tonic-gate goto bail; 18597c478bd9Sstevel@tonic-gate 18607c478bd9Sstevel@tonic-gate for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1861da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 1862d62bc4baSyz147064 rc = new_port_fn(fn_arg, port->lp_linkid, port->lp_addr, 1863d62bc4baSyz147064 port->lp_state, &port->lp_lacp.ActorOperPortState); 1864da14cebeSEric Cheng mac_perim_exit(pmph); 18657c478bd9Sstevel@tonic-gate 1866d62bc4baSyz147064 if (rc != 0) 18677c478bd9Sstevel@tonic-gate goto bail; 18687c478bd9Sstevel@tonic-gate } 18697c478bd9Sstevel@tonic-gate 18707c478bd9Sstevel@tonic-gate bail: 1871da14cebeSEric Cheng mac_perim_exit(mph); 1872da14cebeSEric Cheng AGGR_GRP_REFRELE(grp); 18737c478bd9Sstevel@tonic-gate return (rc); 18747c478bd9Sstevel@tonic-gate } 18757c478bd9Sstevel@tonic-gate 18767c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 18777c478bd9Sstevel@tonic-gate static void 18787c478bd9Sstevel@tonic-gate aggr_m_ioctl(void *arg, queue_t *q, mblk_t *mp) 18797c478bd9Sstevel@tonic-gate { 18807c478bd9Sstevel@tonic-gate miocnak(q, mp, 0, ENOTSUP); 18817c478bd9Sstevel@tonic-gate } 18827c478bd9Sstevel@tonic-gate 1883ba2e4443Sseb static int 1884ba2e4443Sseb aggr_grp_stat(aggr_grp_t *grp, uint_t stat, uint64_t *val) 1885ba2e4443Sseb { 1886ba2e4443Sseb aggr_port_t *port; 1887ba2e4443Sseb uint_t stat_index; 1888ba2e4443Sseb 1889ba2e4443Sseb /* We only aggregate counter statistics. */ 1890ba2e4443Sseb if (IS_MAC_STAT(stat) && !MAC_STAT_ISACOUNTER(stat) || 1891ba2e4443Sseb IS_MACTYPE_STAT(stat) && !ETHER_STAT_ISACOUNTER(stat)) { 1892ba2e4443Sseb return (ENOTSUP); 1893ba2e4443Sseb } 1894ba2e4443Sseb 1895ba2e4443Sseb /* 1896ba2e4443Sseb * Counter statistics for a group are computed by aggregating the 1897ba2e4443Sseb * counters of the members MACs while they were aggregated, plus 1898ba2e4443Sseb * the residual counter of the group itself, which is updated each 1899ba2e4443Sseb * time a MAC is removed from the group. 1900ba2e4443Sseb */ 1901ba2e4443Sseb *val = 0; 1902ba2e4443Sseb for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1903ba2e4443Sseb /* actual port statistic */ 1904ba2e4443Sseb *val += aggr_port_stat(port, stat); 1905ba2e4443Sseb /* 1906ba2e4443Sseb * minus the port stat when it was added, plus any residual 1907d62bc4baSyz147064 * amount for the group. 1908ba2e4443Sseb */ 1909ba2e4443Sseb if (IS_MAC_STAT(stat)) { 1910ba2e4443Sseb stat_index = stat - MAC_STAT_MIN; 1911ba2e4443Sseb *val -= port->lp_stat[stat_index]; 1912ba2e4443Sseb *val += grp->lg_stat[stat_index]; 1913ba2e4443Sseb } else if (IS_MACTYPE_STAT(stat)) { 1914ba2e4443Sseb stat_index = stat - MACTYPE_STAT_MIN; 1915ba2e4443Sseb *val -= port->lp_ether_stat[stat_index]; 1916ba2e4443Sseb *val += grp->lg_ether_stat[stat_index]; 1917ba2e4443Sseb } 1918ba2e4443Sseb } 1919ba2e4443Sseb return (0); 1920ba2e4443Sseb } 1921ba2e4443Sseb 19220dc2366fSVenugopal Iyer int 19230dc2366fSVenugopal Iyer aggr_rx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 19240dc2366fSVenugopal Iyer { 19250dc2366fSVenugopal Iyer aggr_pseudo_rx_ring_t *rx_ring = (aggr_pseudo_rx_ring_t *)rdriver; 19260dc2366fSVenugopal Iyer 19270dc2366fSVenugopal Iyer if (rx_ring->arr_hw_rh != NULL) { 19280dc2366fSVenugopal Iyer *val = mac_pseudo_rx_ring_stat_get(rx_ring->arr_hw_rh, stat); 19290dc2366fSVenugopal Iyer } else { 19300dc2366fSVenugopal Iyer aggr_port_t *port = rx_ring->arr_port; 19310dc2366fSVenugopal Iyer 19320dc2366fSVenugopal Iyer *val = mac_stat_get(port->lp_mh, stat); 19330dc2366fSVenugopal Iyer 19340dc2366fSVenugopal Iyer } 19350dc2366fSVenugopal Iyer return (0); 19360dc2366fSVenugopal Iyer } 19370dc2366fSVenugopal Iyer 19380dc2366fSVenugopal Iyer int 19390dc2366fSVenugopal Iyer aggr_tx_ring_stat(mac_ring_driver_t rdriver, uint_t stat, uint64_t *val) 19400dc2366fSVenugopal Iyer { 19410dc2366fSVenugopal Iyer aggr_pseudo_tx_ring_t *tx_ring = (aggr_pseudo_tx_ring_t *)rdriver; 19420dc2366fSVenugopal Iyer 19430dc2366fSVenugopal Iyer if (tx_ring->atr_hw_rh != NULL) { 19440dc2366fSVenugopal Iyer *val = mac_pseudo_tx_ring_stat_get(tx_ring->atr_hw_rh, stat); 19450dc2366fSVenugopal Iyer } else { 19460dc2366fSVenugopal Iyer aggr_port_t *port = tx_ring->atr_port; 19470dc2366fSVenugopal Iyer 19480dc2366fSVenugopal Iyer *val = mac_stat_get(port->lp_mh, stat); 19490dc2366fSVenugopal Iyer } 19500dc2366fSVenugopal Iyer return (0); 19510dc2366fSVenugopal Iyer } 19520dc2366fSVenugopal Iyer 1953ba2e4443Sseb static int 1954ba2e4443Sseb aggr_m_stat(void *arg, uint_t stat, uint64_t *val) 19557c478bd9Sstevel@tonic-gate { 19567c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg; 1957da14cebeSEric Cheng mac_perim_handle_t mph; 1958ba2e4443Sseb int rval = 0; 19597c478bd9Sstevel@tonic-gate 1960da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate switch (stat) { 19637c478bd9Sstevel@tonic-gate case MAC_STAT_IFSPEED: 1964ba2e4443Sseb *val = grp->lg_ifspeed; 19657c478bd9Sstevel@tonic-gate break; 1966ba2e4443Sseb 1967ba2e4443Sseb case ETHER_STAT_LINK_DUPLEX: 1968ba2e4443Sseb *val = grp->lg_link_duplex; 19697c478bd9Sstevel@tonic-gate break; 1970ba2e4443Sseb 19717c478bd9Sstevel@tonic-gate default: 19727c478bd9Sstevel@tonic-gate /* 1973ba2e4443Sseb * For all other statistics, we return the aggregated stat 1974ba2e4443Sseb * from the underlying ports. aggr_grp_stat() will set 1975ba2e4443Sseb * rval appropriately if the statistic isn't a counter. 19767c478bd9Sstevel@tonic-gate */ 1977ba2e4443Sseb rval = aggr_grp_stat(grp, stat, val); 19787c478bd9Sstevel@tonic-gate } 19797c478bd9Sstevel@tonic-gate 1980da14cebeSEric Cheng mac_perim_exit(mph); 1981ba2e4443Sseb return (rval); 19827c478bd9Sstevel@tonic-gate } 19837c478bd9Sstevel@tonic-gate 19847c478bd9Sstevel@tonic-gate static int 19857c478bd9Sstevel@tonic-gate aggr_m_start(void *arg) 19867c478bd9Sstevel@tonic-gate { 19877c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg; 19887c478bd9Sstevel@tonic-gate aggr_port_t *port; 1989da14cebeSEric Cheng mac_perim_handle_t mph, pmph; 19907c478bd9Sstevel@tonic-gate 1991da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 19927c478bd9Sstevel@tonic-gate 19937c478bd9Sstevel@tonic-gate /* 19947c478bd9Sstevel@tonic-gate * Attempts to start all configured members of the group. 19957c478bd9Sstevel@tonic-gate * Group members will be attached when their link-up notification 19967c478bd9Sstevel@tonic-gate * is received. 19977c478bd9Sstevel@tonic-gate */ 19987c478bd9Sstevel@tonic-gate for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 1999da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 20007c478bd9Sstevel@tonic-gate if (aggr_port_start(port) != 0) { 2001da14cebeSEric Cheng mac_perim_exit(pmph); 20027c478bd9Sstevel@tonic-gate continue; 20037c478bd9Sstevel@tonic-gate } 20047c478bd9Sstevel@tonic-gate 2005da14cebeSEric Cheng /* 2006da14cebeSEric Cheng * Turn on the promiscuous mode if it is required to receive 2007da14cebeSEric Cheng * the non-primary address over a port, or the promiscous 2008da14cebeSEric Cheng * mode is enabled over the aggr. 2009da14cebeSEric Cheng */ 2010da14cebeSEric Cheng if (grp->lg_promisc || port->lp_prom_addr != NULL) { 2011da14cebeSEric Cheng if (aggr_port_promisc(port, B_TRUE) != 0) 20127c478bd9Sstevel@tonic-gate aggr_port_stop(port); 2013da14cebeSEric Cheng } 2014da14cebeSEric Cheng mac_perim_exit(pmph); 20157c478bd9Sstevel@tonic-gate } 20167c478bd9Sstevel@tonic-gate 20177c478bd9Sstevel@tonic-gate grp->lg_started = B_TRUE; 20187c478bd9Sstevel@tonic-gate 2019da14cebeSEric Cheng mac_perim_exit(mph); 20207c478bd9Sstevel@tonic-gate return (0); 20217c478bd9Sstevel@tonic-gate } 20227c478bd9Sstevel@tonic-gate 20237c478bd9Sstevel@tonic-gate static void 20247c478bd9Sstevel@tonic-gate aggr_m_stop(void *arg) 20257c478bd9Sstevel@tonic-gate { 20267c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg; 20277c478bd9Sstevel@tonic-gate aggr_port_t *port; 2028da14cebeSEric Cheng mac_perim_handle_t mph, pmph; 20297c478bd9Sstevel@tonic-gate 2030da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 20317c478bd9Sstevel@tonic-gate 20327c478bd9Sstevel@tonic-gate for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2033da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 2034da14cebeSEric Cheng 2035da14cebeSEric Cheng /* reset port promiscuous mode */ 2036da14cebeSEric Cheng (void) aggr_port_promisc(port, B_FALSE); 2037da14cebeSEric Cheng 20387c478bd9Sstevel@tonic-gate aggr_port_stop(port); 2039da14cebeSEric Cheng mac_perim_exit(pmph); 20407c478bd9Sstevel@tonic-gate } 20417c478bd9Sstevel@tonic-gate 20427c478bd9Sstevel@tonic-gate grp->lg_started = B_FALSE; 2043da14cebeSEric Cheng mac_perim_exit(mph); 20447c478bd9Sstevel@tonic-gate } 20457c478bd9Sstevel@tonic-gate 20467c478bd9Sstevel@tonic-gate static int 20477c478bd9Sstevel@tonic-gate aggr_m_promisc(void *arg, boolean_t on) 20487c478bd9Sstevel@tonic-gate { 20497c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg; 20507c478bd9Sstevel@tonic-gate aggr_port_t *port; 20514deae11aSyz147064 boolean_t link_state_changed = B_FALSE; 2052da14cebeSEric Cheng mac_perim_handle_t mph, pmph; 20537c478bd9Sstevel@tonic-gate 20547c478bd9Sstevel@tonic-gate AGGR_GRP_REFHOLD(grp); 2055da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 20567c478bd9Sstevel@tonic-gate 20574deae11aSyz147064 ASSERT(!grp->lg_closing); 20584deae11aSyz147064 20597c478bd9Sstevel@tonic-gate if (on == grp->lg_promisc) 20607c478bd9Sstevel@tonic-gate goto bail; 20617c478bd9Sstevel@tonic-gate 20627c478bd9Sstevel@tonic-gate for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2063da14cebeSEric Cheng int err = 0; 2064da14cebeSEric Cheng 2065da14cebeSEric Cheng mac_perim_enter_by_mh(port->lp_mh, &pmph); 20667c478bd9Sstevel@tonic-gate AGGR_PORT_REFHOLD(port); 2067da14cebeSEric Cheng if (!on && (port->lp_prom_addr == NULL)) 2068da14cebeSEric Cheng err = aggr_port_promisc(port, B_FALSE); 2069da14cebeSEric Cheng else if (on && port->lp_started) 2070da14cebeSEric Cheng err = aggr_port_promisc(port, B_TRUE); 2071da14cebeSEric Cheng 2072da14cebeSEric Cheng if (err != 0) { 2073da14cebeSEric Cheng if (aggr_grp_detach_port(grp, port)) 2074392b1d6eSyz147064 link_state_changed = B_TRUE; 20754deae11aSyz147064 } else { 20764deae11aSyz147064 /* 20774deae11aSyz147064 * If a port was detached because of a previous 20784deae11aSyz147064 * failure changing the promiscuity, the port 20794deae11aSyz147064 * is reattached when it successfully changes 20804deae11aSyz147064 * the promiscuity now, and this might cause 20814deae11aSyz147064 * the link state of the aggregation to change. 20824deae11aSyz147064 */ 2083392b1d6eSyz147064 if (aggr_grp_attach_port(grp, port)) 2084392b1d6eSyz147064 link_state_changed = B_TRUE; 20854deae11aSyz147064 } 2086da14cebeSEric Cheng mac_perim_exit(pmph); 20877c478bd9Sstevel@tonic-gate AGGR_PORT_REFRELE(port); 20887c478bd9Sstevel@tonic-gate } 20897c478bd9Sstevel@tonic-gate 20907c478bd9Sstevel@tonic-gate grp->lg_promisc = on; 20917c478bd9Sstevel@tonic-gate 20924deae11aSyz147064 if (link_state_changed) 2093ba2e4443Sseb mac_link_update(grp->lg_mh, grp->lg_link_state); 20944deae11aSyz147064 20957c478bd9Sstevel@tonic-gate bail: 2096da14cebeSEric Cheng mac_perim_exit(mph); 20977c478bd9Sstevel@tonic-gate AGGR_GRP_REFRELE(grp); 20987c478bd9Sstevel@tonic-gate 20997c478bd9Sstevel@tonic-gate return (0); 21007c478bd9Sstevel@tonic-gate } 21017c478bd9Sstevel@tonic-gate 2102da14cebeSEric Cheng static void 2103da14cebeSEric Cheng aggr_grp_port_rename(const char *new_name, void *arg) 2104da14cebeSEric Cheng { 2105da14cebeSEric Cheng /* 2106da14cebeSEric Cheng * aggr port's mac client name is the format of "aggr link name" plus 2107da14cebeSEric Cheng * AGGR_PORT_NAME_DELIMIT plus "underneath link name". 2108da14cebeSEric Cheng */ 2109da14cebeSEric Cheng int aggr_len, link_len, clnt_name_len, i; 2110da14cebeSEric Cheng char *str_end, *str_st, *str_del; 2111da14cebeSEric Cheng char aggr_name[MAXNAMELEN]; 2112da14cebeSEric Cheng char link_name[MAXNAMELEN]; 2113da14cebeSEric Cheng char *clnt_name; 2114da14cebeSEric Cheng aggr_grp_t *aggr_grp = arg; 2115da14cebeSEric Cheng aggr_port_t *aggr_port = aggr_grp->lg_ports; 2116da14cebeSEric Cheng 2117da14cebeSEric Cheng for (i = 0; i < aggr_grp->lg_nports; i++) { 2118da14cebeSEric Cheng clnt_name = mac_client_name(aggr_port->lp_mch); 2119da14cebeSEric Cheng clnt_name_len = strlen(clnt_name); 2120da14cebeSEric Cheng str_st = clnt_name; 2121da14cebeSEric Cheng str_end = &(clnt_name[clnt_name_len]); 2122da14cebeSEric Cheng str_del = strchr(str_st, AGGR_PORT_NAME_DELIMIT); 2123da14cebeSEric Cheng ASSERT(str_del != NULL); 2124da14cebeSEric Cheng aggr_len = (intptr_t)((uintptr_t)str_del - (uintptr_t)str_st); 2125da14cebeSEric Cheng link_len = (intptr_t)((uintptr_t)str_end - (uintptr_t)str_del); 2126da14cebeSEric Cheng bzero(aggr_name, MAXNAMELEN); 2127da14cebeSEric Cheng bzero(link_name, MAXNAMELEN); 2128da14cebeSEric Cheng bcopy(clnt_name, aggr_name, aggr_len); 2129da14cebeSEric Cheng bcopy(str_del, link_name, link_len + 1); 2130da14cebeSEric Cheng bzero(clnt_name, MAXNAMELEN); 2131da14cebeSEric Cheng (void) snprintf(clnt_name, MAXNAMELEN, "%s%s", new_name, 2132da14cebeSEric Cheng link_name); 2133da14cebeSEric Cheng 2134da14cebeSEric Cheng (void) mac_rename_primary(aggr_port->lp_mh, NULL); 2135da14cebeSEric Cheng aggr_port = aggr_port->lp_next; 2136da14cebeSEric Cheng } 2137da14cebeSEric Cheng } 2138da14cebeSEric Cheng 21397c478bd9Sstevel@tonic-gate /* 2140ba2e4443Sseb * Initialize the capabilities that are advertised for the group 2141ba2e4443Sseb * according to the capabilities of the constituent ports. 2142ba2e4443Sseb */ 2143ba2e4443Sseb static boolean_t 2144ba2e4443Sseb aggr_m_capab_get(void *arg, mac_capab_t cap, void *cap_data) 2145ba2e4443Sseb { 2146ba2e4443Sseb aggr_grp_t *grp = arg; 2147ba2e4443Sseb 2148ba2e4443Sseb switch (cap) { 2149ba2e4443Sseb case MAC_CAPAB_HCKSUM: { 2150ba2e4443Sseb uint32_t *hcksum_txflags = cap_data; 2151ba2e4443Sseb *hcksum_txflags = grp->lg_hcksum_txflags; 2152ba2e4443Sseb break; 2153ba2e4443Sseb } 215419c868a0SRoamer case MAC_CAPAB_LSO: { 215519c868a0SRoamer mac_capab_lso_t *cap_lso = cap_data; 215619c868a0SRoamer 215719c868a0SRoamer if (grp->lg_lso) { 215819c868a0SRoamer *cap_lso = grp->lg_cap_lso; 215919c868a0SRoamer break; 216019c868a0SRoamer } else { 216119c868a0SRoamer return (B_FALSE); 216219c868a0SRoamer } 216319c868a0SRoamer } 2164d62bc4baSyz147064 case MAC_CAPAB_NO_NATIVEVLAN: 2165d62bc4baSyz147064 return (!grp->lg_vlan); 2166d62bc4baSyz147064 case MAC_CAPAB_NO_ZCOPY: 2167d62bc4baSyz147064 return (!grp->lg_zcopy); 2168da14cebeSEric Cheng case MAC_CAPAB_RINGS: { 2169da14cebeSEric Cheng mac_capab_rings_t *cap_rings = cap_data; 2170da14cebeSEric Cheng 2171da14cebeSEric Cheng if (cap_rings->mr_type == MAC_RING_TYPE_RX) { 2172da14cebeSEric Cheng cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 2173da14cebeSEric Cheng cap_rings->mr_rnum = grp->lg_rx_group.arg_ring_cnt; 2174da14cebeSEric Cheng 2175da14cebeSEric Cheng /* 2176da14cebeSEric Cheng * An aggregation advertises only one (pseudo) RX 2177da14cebeSEric Cheng * group, which virtualizes the main/primary group of 2178da14cebeSEric Cheng * the underlying devices. 2179da14cebeSEric Cheng */ 2180da14cebeSEric Cheng cap_rings->mr_gnum = 1; 2181da14cebeSEric Cheng cap_rings->mr_gaddring = NULL; 2182da14cebeSEric Cheng cap_rings->mr_gremring = NULL; 2183da14cebeSEric Cheng } else { 21840dc2366fSVenugopal Iyer cap_rings->mr_group_type = MAC_GROUP_TYPE_STATIC; 21850dc2366fSVenugopal Iyer cap_rings->mr_rnum = grp->lg_tx_group.atg_ring_cnt; 21860dc2366fSVenugopal Iyer cap_rings->mr_gnum = 0; 2187da14cebeSEric Cheng } 21880dc2366fSVenugopal Iyer cap_rings->mr_rget = aggr_fill_ring; 21890dc2366fSVenugopal Iyer cap_rings->mr_gget = aggr_fill_group; 2190da14cebeSEric Cheng break; 2191da14cebeSEric Cheng } 2192da14cebeSEric Cheng case MAC_CAPAB_AGGR: 2193da14cebeSEric Cheng { 2194da14cebeSEric Cheng mac_capab_aggr_t *aggr_cap; 2195da14cebeSEric Cheng 2196da14cebeSEric Cheng if (cap_data != NULL) { 2197da14cebeSEric Cheng aggr_cap = cap_data; 2198da14cebeSEric Cheng aggr_cap->mca_rename_fn = aggr_grp_port_rename; 2199da14cebeSEric Cheng aggr_cap->mca_unicst = aggr_m_unicst; 22000dc2366fSVenugopal Iyer aggr_cap->mca_find_tx_ring_fn = aggr_find_tx_ring; 22010dc2366fSVenugopal Iyer aggr_cap->mca_arg = arg; 2202da14cebeSEric Cheng } 2203da14cebeSEric Cheng return (B_TRUE); 2204da14cebeSEric Cheng } 2205ba2e4443Sseb default: 2206ba2e4443Sseb return (B_FALSE); 2207ba2e4443Sseb } 2208ba2e4443Sseb return (B_TRUE); 2209ba2e4443Sseb } 2210ba2e4443Sseb 2211da14cebeSEric Cheng /* 2212da14cebeSEric Cheng * Callback funtion for MAC layer to register groups. 2213da14cebeSEric Cheng */ 2214da14cebeSEric Cheng static void 2215da14cebeSEric Cheng aggr_fill_group(void *arg, mac_ring_type_t rtype, const int index, 2216da14cebeSEric Cheng mac_group_info_t *infop, mac_group_handle_t gh) 221719599311Sudpa { 2218da14cebeSEric Cheng aggr_grp_t *grp = arg; 2219da14cebeSEric Cheng aggr_pseudo_rx_group_t *rx_group; 22200dc2366fSVenugopal Iyer aggr_pseudo_tx_group_t *tx_group; 222119599311Sudpa 22220dc2366fSVenugopal Iyer ASSERT(index == 0); 22230dc2366fSVenugopal Iyer if (rtype == MAC_RING_TYPE_RX) { 2224da14cebeSEric Cheng rx_group = &grp->lg_rx_group; 2225da14cebeSEric Cheng rx_group->arg_gh = gh; 2226da14cebeSEric Cheng rx_group->arg_grp = grp; 222719599311Sudpa 2228da14cebeSEric Cheng infop->mgi_driver = (mac_group_driver_t)rx_group; 2229da14cebeSEric Cheng infop->mgi_start = NULL; 2230da14cebeSEric Cheng infop->mgi_stop = NULL; 2231da14cebeSEric Cheng infop->mgi_addmac = aggr_addmac; 2232da14cebeSEric Cheng infop->mgi_remmac = aggr_remmac; 2233da14cebeSEric Cheng infop->mgi_count = rx_group->arg_ring_cnt; 22340dc2366fSVenugopal Iyer } else { 22350dc2366fSVenugopal Iyer tx_group = &grp->lg_tx_group; 22360dc2366fSVenugopal Iyer tx_group->atg_gh = gh; 22370dc2366fSVenugopal Iyer } 2238da14cebeSEric Cheng } 2239da14cebeSEric Cheng 2240da14cebeSEric Cheng /* 2241da14cebeSEric Cheng * Callback funtion for MAC layer to register all rings. 2242da14cebeSEric Cheng */ 2243da14cebeSEric Cheng static void 2244da14cebeSEric Cheng aggr_fill_ring(void *arg, mac_ring_type_t rtype, const int rg_index, 2245da14cebeSEric Cheng const int index, mac_ring_info_t *infop, mac_ring_handle_t rh) 2246da14cebeSEric Cheng { 2247da14cebeSEric Cheng aggr_grp_t *grp = arg; 2248da14cebeSEric Cheng 2249da14cebeSEric Cheng switch (rtype) { 2250da14cebeSEric Cheng case MAC_RING_TYPE_RX: { 2251da14cebeSEric Cheng aggr_pseudo_rx_group_t *rx_group = &grp->lg_rx_group; 2252da14cebeSEric Cheng aggr_pseudo_rx_ring_t *rx_ring; 2253da14cebeSEric Cheng mac_intr_t aggr_mac_intr; 2254da14cebeSEric Cheng 2255da14cebeSEric Cheng ASSERT(rg_index == 0); 2256da14cebeSEric Cheng 2257da14cebeSEric Cheng ASSERT((index >= 0) && (index < rx_group->arg_ring_cnt)); 2258da14cebeSEric Cheng rx_ring = rx_group->arg_rings + index; 2259da14cebeSEric Cheng rx_ring->arr_rh = rh; 2260da14cebeSEric Cheng 2261da14cebeSEric Cheng /* 2262da14cebeSEric Cheng * Entrypoint to enable interrupt (disable poll) and 2263da14cebeSEric Cheng * disable interrupt (enable poll). 2264da14cebeSEric Cheng */ 2265da14cebeSEric Cheng aggr_mac_intr.mi_handle = (mac_intr_handle_t)rx_ring; 2266da14cebeSEric Cheng aggr_mac_intr.mi_enable = aggr_pseudo_enable_intr; 2267da14cebeSEric Cheng aggr_mac_intr.mi_disable = aggr_pseudo_disable_intr; 22680dc2366fSVenugopal Iyer aggr_mac_intr.mi_ddi_handle = NULL; 2269da14cebeSEric Cheng 2270da14cebeSEric Cheng infop->mri_driver = (mac_ring_driver_t)rx_ring; 2271da14cebeSEric Cheng infop->mri_start = aggr_pseudo_start_ring; 2272da14cebeSEric Cheng infop->mri_stop = aggr_pseudo_stop_ring; 2273da14cebeSEric Cheng 2274da14cebeSEric Cheng infop->mri_intr = aggr_mac_intr; 2275da14cebeSEric Cheng infop->mri_poll = aggr_rx_poll; 22760dc2366fSVenugopal Iyer 22770dc2366fSVenugopal Iyer infop->mri_stat = aggr_rx_ring_stat; 22780dc2366fSVenugopal Iyer break; 22790dc2366fSVenugopal Iyer } 22800dc2366fSVenugopal Iyer case MAC_RING_TYPE_TX: { 22810dc2366fSVenugopal Iyer aggr_pseudo_tx_group_t *tx_group = &grp->lg_tx_group; 22820dc2366fSVenugopal Iyer aggr_pseudo_tx_ring_t *tx_ring; 22830dc2366fSVenugopal Iyer 22840dc2366fSVenugopal Iyer ASSERT(rg_index == -1); 22850dc2366fSVenugopal Iyer ASSERT(index < tx_group->atg_ring_cnt); 22860dc2366fSVenugopal Iyer 22870dc2366fSVenugopal Iyer tx_ring = &tx_group->atg_rings[index]; 22880dc2366fSVenugopal Iyer tx_ring->atr_rh = rh; 22890dc2366fSVenugopal Iyer 22900dc2366fSVenugopal Iyer infop->mri_driver = (mac_ring_driver_t)tx_ring; 22910dc2366fSVenugopal Iyer infop->mri_start = NULL; 22920dc2366fSVenugopal Iyer infop->mri_stop = NULL; 22930dc2366fSVenugopal Iyer infop->mri_tx = aggr_ring_tx; 22940dc2366fSVenugopal Iyer infop->mri_stat = aggr_tx_ring_stat; 22950dc2366fSVenugopal Iyer /* 22960dc2366fSVenugopal Iyer * Use the hw TX ring handle to find if the ring needs 22970dc2366fSVenugopal Iyer * serialization or not. For NICs that do not expose 22980dc2366fSVenugopal Iyer * Tx rings, atr_hw_rh will be NULL. 22990dc2366fSVenugopal Iyer */ 23000dc2366fSVenugopal Iyer if (tx_ring->atr_hw_rh != NULL) { 23010dc2366fSVenugopal Iyer infop->mri_flags = 23020dc2366fSVenugopal Iyer mac_hwring_getinfo(tx_ring->atr_hw_rh); 23030dc2366fSVenugopal Iyer } 230419599311Sudpa break; 230519599311Sudpa } 2306da14cebeSEric Cheng default: 2307da14cebeSEric Cheng break; 230819599311Sudpa } 2309da14cebeSEric Cheng } 2310da14cebeSEric Cheng 2311da14cebeSEric Cheng static mblk_t * 2312da14cebeSEric Cheng aggr_rx_poll(void *arg, int bytes_to_pickup) 2313da14cebeSEric Cheng { 2314da14cebeSEric Cheng aggr_pseudo_rx_ring_t *rr_ring = arg; 2315da14cebeSEric Cheng aggr_port_t *port = rr_ring->arr_port; 2316da14cebeSEric Cheng aggr_grp_t *grp = port->lp_grp; 2317da14cebeSEric Cheng mblk_t *mp_chain, *mp, **mpp; 2318da14cebeSEric Cheng 2319da14cebeSEric Cheng mp_chain = mac_hwring_poll(rr_ring->arr_hw_rh, bytes_to_pickup); 2320da14cebeSEric Cheng 2321da14cebeSEric Cheng if (grp->lg_lacp_mode == AGGR_LACP_OFF) 2322da14cebeSEric Cheng return (mp_chain); 2323da14cebeSEric Cheng 2324da14cebeSEric Cheng mpp = &mp_chain; 2325da14cebeSEric Cheng while ((mp = *mpp) != NULL) { 2326da14cebeSEric Cheng if (MBLKL(mp) >= sizeof (struct ether_header)) { 2327da14cebeSEric Cheng struct ether_header *ehp; 2328da14cebeSEric Cheng 2329da14cebeSEric Cheng ehp = (struct ether_header *)mp->b_rptr; 2330da14cebeSEric Cheng if (ntohs(ehp->ether_type) == ETHERTYPE_SLOW) { 2331da14cebeSEric Cheng *mpp = mp->b_next; 2332da14cebeSEric Cheng mp->b_next = NULL; 2333da14cebeSEric Cheng aggr_recv_lacp(port, 2334da14cebeSEric Cheng (mac_resource_handle_t)rr_ring, mp); 2335da14cebeSEric Cheng continue; 2336da14cebeSEric Cheng } 2337da14cebeSEric Cheng } 2338da14cebeSEric Cheng 2339da14cebeSEric Cheng if (!port->lp_collector_enabled) { 2340da14cebeSEric Cheng *mpp = mp->b_next; 2341da14cebeSEric Cheng mp->b_next = NULL; 2342da14cebeSEric Cheng freemsg(mp); 2343da14cebeSEric Cheng continue; 2344da14cebeSEric Cheng } 2345da14cebeSEric Cheng mpp = &mp->b_next; 2346da14cebeSEric Cheng } 2347da14cebeSEric Cheng return (mp_chain); 2348da14cebeSEric Cheng } 2349da14cebeSEric Cheng 2350da14cebeSEric Cheng static int 2351da14cebeSEric Cheng aggr_addmac(void *arg, const uint8_t *mac_addr) 2352da14cebeSEric Cheng { 2353da14cebeSEric Cheng aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 2354da14cebeSEric Cheng aggr_unicst_addr_t *addr, **pprev; 2355da14cebeSEric Cheng aggr_grp_t *grp = rx_group->arg_grp; 2356da14cebeSEric Cheng aggr_port_t *port, *p; 2357da14cebeSEric Cheng mac_perim_handle_t mph; 2358da14cebeSEric Cheng int err = 0; 2359da14cebeSEric Cheng 2360da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 2361da14cebeSEric Cheng 2362da14cebeSEric Cheng if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2363da14cebeSEric Cheng mac_perim_exit(mph); 236419599311Sudpa return (0); 236519599311Sudpa } 236619599311Sudpa 2367ba2e4443Sseb /* 2368da14cebeSEric Cheng * Insert this mac address into the list of mac addresses owned by 2369da14cebeSEric Cheng * the aggregation pseudo group. 2370da14cebeSEric Cheng */ 2371da14cebeSEric Cheng pprev = &rx_group->arg_macaddr; 2372da14cebeSEric Cheng while ((addr = *pprev) != NULL) { 2373da14cebeSEric Cheng if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) == 0) { 2374da14cebeSEric Cheng mac_perim_exit(mph); 2375da14cebeSEric Cheng return (EEXIST); 2376da14cebeSEric Cheng } 2377da14cebeSEric Cheng pprev = &addr->aua_next; 2378da14cebeSEric Cheng } 2379da14cebeSEric Cheng addr = kmem_alloc(sizeof (aggr_unicst_addr_t), KM_SLEEP); 2380da14cebeSEric Cheng bcopy(mac_addr, addr->aua_addr, ETHERADDRL); 2381da14cebeSEric Cheng addr->aua_next = NULL; 2382da14cebeSEric Cheng *pprev = addr; 2383da14cebeSEric Cheng 2384da14cebeSEric Cheng for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2385da14cebeSEric Cheng if ((err = aggr_port_addmac(port, mac_addr)) != 0) 2386da14cebeSEric Cheng break; 2387da14cebeSEric Cheng 2388da14cebeSEric Cheng if (err != 0) { 2389da14cebeSEric Cheng for (p = grp->lg_ports; p != port; p = p->lp_next) 2390da14cebeSEric Cheng aggr_port_remmac(p, mac_addr); 2391da14cebeSEric Cheng 2392da14cebeSEric Cheng *pprev = NULL; 2393da14cebeSEric Cheng kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2394da14cebeSEric Cheng } 2395da14cebeSEric Cheng 2396da14cebeSEric Cheng mac_perim_exit(mph); 2397da14cebeSEric Cheng return (err); 2398da14cebeSEric Cheng } 2399da14cebeSEric Cheng 2400da14cebeSEric Cheng static int 2401da14cebeSEric Cheng aggr_remmac(void *arg, const uint8_t *mac_addr) 2402da14cebeSEric Cheng { 2403da14cebeSEric Cheng aggr_pseudo_rx_group_t *rx_group = (aggr_pseudo_rx_group_t *)arg; 2404da14cebeSEric Cheng aggr_unicst_addr_t *addr, **pprev; 2405da14cebeSEric Cheng aggr_grp_t *grp = rx_group->arg_grp; 2406da14cebeSEric Cheng aggr_port_t *port; 2407da14cebeSEric Cheng mac_perim_handle_t mph; 2408da14cebeSEric Cheng int err = 0; 2409da14cebeSEric Cheng 2410da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 2411da14cebeSEric Cheng 2412da14cebeSEric Cheng if (bcmp(mac_addr, grp->lg_addr, ETHERADDRL) == 0) { 2413da14cebeSEric Cheng mac_perim_exit(mph); 2414da14cebeSEric Cheng return (0); 2415da14cebeSEric Cheng } 2416da14cebeSEric Cheng 2417da14cebeSEric Cheng /* 2418da14cebeSEric Cheng * Insert this mac address into the list of mac addresses owned by 2419da14cebeSEric Cheng * the aggregation pseudo group. 2420da14cebeSEric Cheng */ 2421da14cebeSEric Cheng pprev = &rx_group->arg_macaddr; 2422da14cebeSEric Cheng while ((addr = *pprev) != NULL) { 2423da14cebeSEric Cheng if (bcmp(mac_addr, addr->aua_addr, ETHERADDRL) != 0) { 2424da14cebeSEric Cheng pprev = &addr->aua_next; 2425da14cebeSEric Cheng continue; 2426da14cebeSEric Cheng } 2427da14cebeSEric Cheng break; 2428da14cebeSEric Cheng } 2429da14cebeSEric Cheng if (addr == NULL) { 2430da14cebeSEric Cheng mac_perim_exit(mph); 2431da14cebeSEric Cheng return (EINVAL); 2432da14cebeSEric Cheng } 2433da14cebeSEric Cheng 2434da14cebeSEric Cheng for (port = grp->lg_ports; port != NULL; port = port->lp_next) 2435da14cebeSEric Cheng aggr_port_remmac(port, mac_addr); 2436da14cebeSEric Cheng 2437da14cebeSEric Cheng *pprev = addr->aua_next; 2438da14cebeSEric Cheng kmem_free(addr, sizeof (aggr_unicst_addr_t)); 2439da14cebeSEric Cheng 2440da14cebeSEric Cheng mac_perim_exit(mph); 2441da14cebeSEric Cheng return (err); 2442da14cebeSEric Cheng } 2443da14cebeSEric Cheng 2444da14cebeSEric Cheng /* 24457c478bd9Sstevel@tonic-gate * Add or remove the multicast addresses that are defined for the group 24467c478bd9Sstevel@tonic-gate * to or from the specified port. 2447ae6aa22aSVenugopal Iyer * 2448ae6aa22aSVenugopal Iyer * Note that aggr_grp_multicst_port(..., B_TRUE) is called when the port 2449ae6aa22aSVenugopal Iyer * is started and attached, and aggr_grp_multicst_port(..., B_FALSE) is 2450ae6aa22aSVenugopal Iyer * called when the port is either stopped or detached. 24517c478bd9Sstevel@tonic-gate */ 24527c478bd9Sstevel@tonic-gate void 24537c478bd9Sstevel@tonic-gate aggr_grp_multicst_port(aggr_port_t *port, boolean_t add) 24547c478bd9Sstevel@tonic-gate { 24557c478bd9Sstevel@tonic-gate aggr_grp_t *grp = port->lp_grp; 24567c478bd9Sstevel@tonic-gate 2457da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(port->lp_mh)); 2458da14cebeSEric Cheng ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 24597c478bd9Sstevel@tonic-gate 2460ae6aa22aSVenugopal Iyer if (!port->lp_started || port->lp_state != AGGR_PORT_STATE_ATTACHED) 24617c478bd9Sstevel@tonic-gate return; 24627c478bd9Sstevel@tonic-gate 2463da14cebeSEric Cheng mac_multicast_refresh(grp->lg_mh, aggr_port_multicst, port, add); 24647c478bd9Sstevel@tonic-gate } 24657c478bd9Sstevel@tonic-gate 24667c478bd9Sstevel@tonic-gate static int 24677c478bd9Sstevel@tonic-gate aggr_m_multicst(void *arg, boolean_t add, const uint8_t *addrp) 24687c478bd9Sstevel@tonic-gate { 24697c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg; 24704a6df672SAnil udupa aggr_port_t *port = NULL, *errport = NULL; 2471da14cebeSEric Cheng mac_perim_handle_t mph; 24724a6df672SAnil udupa int err = 0; 24737c478bd9Sstevel@tonic-gate 2474da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 24757c478bd9Sstevel@tonic-gate for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2476ae6aa22aSVenugopal Iyer if (port->lp_state != AGGR_PORT_STATE_ATTACHED || 2477ae6aa22aSVenugopal Iyer !port->lp_started) { 24787c478bd9Sstevel@tonic-gate continue; 2479ae6aa22aSVenugopal Iyer } 24804a6df672SAnil udupa err = aggr_port_multicst(port, add, addrp); 24814a6df672SAnil udupa if (err != 0) { 24824a6df672SAnil udupa errport = port; 24834a6df672SAnil udupa break; 24844a6df672SAnil udupa } 24854a6df672SAnil udupa } 24864a6df672SAnil udupa 24874a6df672SAnil udupa /* 24884a6df672SAnil udupa * At least one port caused error return and this error is returned to 24894a6df672SAnil udupa * mac, eventually a NAK would be sent upwards. 24904a6df672SAnil udupa * Some ports have this multicast address listed now, and some don't. 24914a6df672SAnil udupa * Treat this error as a whole aggr failure not individual port failure. 24924a6df672SAnil udupa * Therefore remove this multicast address from other ports. 24934a6df672SAnil udupa */ 24944a6df672SAnil udupa if ((err != 0) && add) { 24954a6df672SAnil udupa for (port = grp->lg_ports; port != errport; 24964a6df672SAnil udupa port = port->lp_next) { 24974a6df672SAnil udupa if (port->lp_state != AGGR_PORT_STATE_ATTACHED || 24984a6df672SAnil udupa !port->lp_started) { 24994a6df672SAnil udupa continue; 25004a6df672SAnil udupa } 25014a6df672SAnil udupa (void) aggr_port_multicst(port, B_FALSE, addrp); 25024a6df672SAnil udupa } 25037c478bd9Sstevel@tonic-gate } 2504da14cebeSEric Cheng mac_perim_exit(mph); 25057c478bd9Sstevel@tonic-gate return (err); 25067c478bd9Sstevel@tonic-gate } 25077c478bd9Sstevel@tonic-gate 25087c478bd9Sstevel@tonic-gate static int 25097c478bd9Sstevel@tonic-gate aggr_m_unicst(void *arg, const uint8_t *macaddr) 25107c478bd9Sstevel@tonic-gate { 25117c478bd9Sstevel@tonic-gate aggr_grp_t *grp = arg; 2512da14cebeSEric Cheng mac_perim_handle_t mph; 2513da14cebeSEric Cheng int err; 25147c478bd9Sstevel@tonic-gate 2515da14cebeSEric Cheng mac_perim_enter_by_mh(grp->lg_mh, &mph); 2516da14cebeSEric Cheng err = aggr_grp_modify_common(grp, AGGR_MODIFY_MAC, 0, B_TRUE, macaddr, 25177c478bd9Sstevel@tonic-gate 0, 0); 2518da14cebeSEric Cheng mac_perim_exit(mph); 2519da14cebeSEric Cheng return (err); 25207c478bd9Sstevel@tonic-gate } 25217c478bd9Sstevel@tonic-gate 25227c478bd9Sstevel@tonic-gate /* 25237c478bd9Sstevel@tonic-gate * Initialize the capabilities that are advertised for the group 25247c478bd9Sstevel@tonic-gate * according to the capabilities of the constituent ports. 25257c478bd9Sstevel@tonic-gate */ 25267c478bd9Sstevel@tonic-gate static void 25277c478bd9Sstevel@tonic-gate aggr_grp_capab_set(aggr_grp_t *grp) 25287c478bd9Sstevel@tonic-gate { 2529020da793Sseb uint32_t cksum; 25307c478bd9Sstevel@tonic-gate aggr_port_t *port; 253119c868a0SRoamer mac_capab_lso_t cap_lso; 25327c478bd9Sstevel@tonic-gate 2533da14cebeSEric Cheng ASSERT(grp->lg_mh == NULL); 25347c478bd9Sstevel@tonic-gate ASSERT(grp->lg_ports != NULL); 2535ba2e4443Sseb 2536ba2e4443Sseb grp->lg_hcksum_txflags = (uint32_t)-1; 2537d62bc4baSyz147064 grp->lg_zcopy = B_TRUE; 2538d62bc4baSyz147064 grp->lg_vlan = B_TRUE; 2539ba2e4443Sseb 254019c868a0SRoamer grp->lg_lso = B_TRUE; 254119c868a0SRoamer grp->lg_cap_lso.lso_flags = (t_uscalar_t)-1; 254219c868a0SRoamer grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = (t_uscalar_t)-1; 254319c868a0SRoamer 25447c478bd9Sstevel@tonic-gate for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2545020da793Sseb if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &cksum)) 2546020da793Sseb cksum = 0; 2547020da793Sseb grp->lg_hcksum_txflags &= cksum; 25487c478bd9Sstevel@tonic-gate 2549d62bc4baSyz147064 grp->lg_vlan &= 2550d62bc4baSyz147064 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL); 2551d62bc4baSyz147064 2552d62bc4baSyz147064 grp->lg_zcopy &= 2553d62bc4baSyz147064 !mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL); 255419c868a0SRoamer 255519c868a0SRoamer grp->lg_lso &= 255619c868a0SRoamer mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso); 255719c868a0SRoamer if (grp->lg_lso) { 255819c868a0SRoamer grp->lg_cap_lso.lso_flags &= cap_lso.lso_flags; 255919c868a0SRoamer if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 256019c868a0SRoamer cap_lso.lso_basic_tcp_ipv4.lso_max) 256119c868a0SRoamer grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max = 256219c868a0SRoamer cap_lso.lso_basic_tcp_ipv4.lso_max; 256319c868a0SRoamer } 25647c478bd9Sstevel@tonic-gate } 2565ba2e4443Sseb } 2566ba2e4443Sseb 25677c478bd9Sstevel@tonic-gate /* 2568ba2e4443Sseb * Checks whether the capabilities of the port being added are compatible 25697c478bd9Sstevel@tonic-gate * with the current capabilities of the aggregation. 25707c478bd9Sstevel@tonic-gate */ 25717c478bd9Sstevel@tonic-gate static boolean_t 25727c478bd9Sstevel@tonic-gate aggr_grp_capab_check(aggr_grp_t *grp, aggr_port_t *port) 25737c478bd9Sstevel@tonic-gate { 2574ba2e4443Sseb uint32_t hcksum_txflags; 25757c478bd9Sstevel@tonic-gate 25767c478bd9Sstevel@tonic-gate ASSERT(grp->lg_ports != NULL); 25777c478bd9Sstevel@tonic-gate 2578d62bc4baSyz147064 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_NATIVEVLAN, NULL)) & 2579d62bc4baSyz147064 grp->lg_vlan) != grp->lg_vlan) { 2580d62bc4baSyz147064 return (B_FALSE); 2581d62bc4baSyz147064 } 2582d62bc4baSyz147064 2583d62bc4baSyz147064 if (((!mac_capab_get(port->lp_mh, MAC_CAPAB_NO_ZCOPY, NULL)) & 2584d62bc4baSyz147064 grp->lg_zcopy) != grp->lg_zcopy) { 2585d62bc4baSyz147064 return (B_FALSE); 2586d62bc4baSyz147064 } 2587d62bc4baSyz147064 2588ba2e4443Sseb if (!mac_capab_get(port->lp_mh, MAC_CAPAB_HCKSUM, &hcksum_txflags)) { 2589ba2e4443Sseb if (grp->lg_hcksum_txflags != 0) 2590ba2e4443Sseb return (B_FALSE); 2591ba2e4443Sseb } else if ((hcksum_txflags & grp->lg_hcksum_txflags) != 2592ba2e4443Sseb grp->lg_hcksum_txflags) { 2593ba2e4443Sseb return (B_FALSE); 2594ba2e4443Sseb } 2595ba2e4443Sseb 259619c868a0SRoamer if (grp->lg_lso) { 259719c868a0SRoamer mac_capab_lso_t cap_lso; 259819c868a0SRoamer 259919c868a0SRoamer if (mac_capab_get(port->lp_mh, MAC_CAPAB_LSO, &cap_lso)) { 260019c868a0SRoamer if ((grp->lg_cap_lso.lso_flags & cap_lso.lso_flags) != 260119c868a0SRoamer grp->lg_cap_lso.lso_flags) 260219c868a0SRoamer return (B_FALSE); 260319c868a0SRoamer if (grp->lg_cap_lso.lso_basic_tcp_ipv4.lso_max > 260419c868a0SRoamer cap_lso.lso_basic_tcp_ipv4.lso_max) 260519c868a0SRoamer return (B_FALSE); 260619c868a0SRoamer } else { 260719c868a0SRoamer return (B_FALSE); 260819c868a0SRoamer } 260919c868a0SRoamer } 261019c868a0SRoamer 2611ba2e4443Sseb return (B_TRUE); 26127c478bd9Sstevel@tonic-gate } 2613f4420ae7Snd99603 2614f4420ae7Snd99603 /* 2615f4420ae7Snd99603 * Returns the maximum SDU according to the SDU of the constituent ports. 2616f4420ae7Snd99603 */ 2617f4420ae7Snd99603 static uint_t 2618f4420ae7Snd99603 aggr_grp_max_sdu(aggr_grp_t *grp) 2619f4420ae7Snd99603 { 2620f4420ae7Snd99603 uint_t max_sdu = (uint_t)-1; 2621f4420ae7Snd99603 aggr_port_t *port; 2622f4420ae7Snd99603 2623f4420ae7Snd99603 ASSERT(grp->lg_ports != NULL); 2624f4420ae7Snd99603 2625f4420ae7Snd99603 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2626e7801d59Ssowmini uint_t port_sdu_max; 2627e7801d59Ssowmini 2628e7801d59Ssowmini mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2629e7801d59Ssowmini if (max_sdu > port_sdu_max) 2630e7801d59Ssowmini max_sdu = port_sdu_max; 2631f4420ae7Snd99603 } 2632f4420ae7Snd99603 2633f4420ae7Snd99603 return (max_sdu); 2634f4420ae7Snd99603 } 2635f4420ae7Snd99603 2636f4420ae7Snd99603 /* 2637f4420ae7Snd99603 * Checks if the maximum SDU of the specified port is compatible 2638f4420ae7Snd99603 * with the maximum SDU of the specified aggregation group, returns 2639f4420ae7Snd99603 * B_TRUE if it is, B_FALSE otherwise. 2640f4420ae7Snd99603 */ 2641f4420ae7Snd99603 static boolean_t 2642f4420ae7Snd99603 aggr_grp_sdu_check(aggr_grp_t *grp, aggr_port_t *port) 2643f4420ae7Snd99603 { 2644e7801d59Ssowmini uint_t port_sdu_max; 2645f4420ae7Snd99603 2646e7801d59Ssowmini mac_sdu_get(port->lp_mh, NULL, &port_sdu_max); 2647e7801d59Ssowmini return (port_sdu_max >= grp->lg_max_sdu); 2648f4420ae7Snd99603 } 2649d62bc4baSyz147064 2650d62bc4baSyz147064 /* 2651d62bc4baSyz147064 * Returns the maximum margin according to the margin of the constituent ports. 2652d62bc4baSyz147064 */ 2653d62bc4baSyz147064 static uint32_t 2654d62bc4baSyz147064 aggr_grp_max_margin(aggr_grp_t *grp) 2655d62bc4baSyz147064 { 2656d62bc4baSyz147064 uint32_t margin = UINT32_MAX; 2657d62bc4baSyz147064 aggr_port_t *port; 2658d62bc4baSyz147064 2659da14cebeSEric Cheng ASSERT(grp->lg_mh == NULL); 2660d62bc4baSyz147064 ASSERT(grp->lg_ports != NULL); 2661d62bc4baSyz147064 2662d62bc4baSyz147064 for (port = grp->lg_ports; port != NULL; port = port->lp_next) { 2663d62bc4baSyz147064 if (margin > port->lp_margin) 2664d62bc4baSyz147064 margin = port->lp_margin; 2665d62bc4baSyz147064 } 2666d62bc4baSyz147064 2667d62bc4baSyz147064 grp->lg_margin = margin; 2668d62bc4baSyz147064 return (margin); 2669d62bc4baSyz147064 } 2670d62bc4baSyz147064 2671d62bc4baSyz147064 /* 2672d62bc4baSyz147064 * Checks if the maximum margin of the specified port is compatible 2673d62bc4baSyz147064 * with the maximum margin of the specified aggregation group, returns 2674d62bc4baSyz147064 * B_TRUE if it is, B_FALSE otherwise. 2675d62bc4baSyz147064 */ 2676d62bc4baSyz147064 static boolean_t 2677d62bc4baSyz147064 aggr_grp_margin_check(aggr_grp_t *grp, aggr_port_t *port) 2678d62bc4baSyz147064 { 2679d62bc4baSyz147064 if (port->lp_margin >= grp->lg_margin) 2680d62bc4baSyz147064 return (B_TRUE); 2681d62bc4baSyz147064 2682d62bc4baSyz147064 /* 2683d62bc4baSyz147064 * See whether the current margin value is allowed to be changed to 2684d62bc4baSyz147064 * the new value. 2685d62bc4baSyz147064 */ 2686d62bc4baSyz147064 if (!mac_margin_update(grp->lg_mh, port->lp_margin)) 2687d62bc4baSyz147064 return (B_FALSE); 2688d62bc4baSyz147064 2689d62bc4baSyz147064 grp->lg_margin = port->lp_margin; 2690d62bc4baSyz147064 return (B_TRUE); 2691d62bc4baSyz147064 } 2692986cab2cSGirish Moodalbail 2693986cab2cSGirish Moodalbail /* 2694986cab2cSGirish Moodalbail * Set MTU on individual ports of an aggregation group 2695986cab2cSGirish Moodalbail */ 2696986cab2cSGirish Moodalbail static int 2697986cab2cSGirish Moodalbail aggr_set_port_sdu(aggr_grp_t *grp, aggr_port_t *port, uint32_t sdu, 2698986cab2cSGirish Moodalbail uint32_t *old_mtu) 2699986cab2cSGirish Moodalbail { 2700986cab2cSGirish Moodalbail boolean_t removed = B_FALSE; 2701986cab2cSGirish Moodalbail mac_perim_handle_t mph; 2702986cab2cSGirish Moodalbail mac_diag_t diag; 2703986cab2cSGirish Moodalbail int err, rv, retry = 0; 2704986cab2cSGirish Moodalbail 2705986cab2cSGirish Moodalbail if (port->lp_mah != NULL) { 2706986cab2cSGirish Moodalbail (void) mac_unicast_remove(port->lp_mch, port->lp_mah); 2707986cab2cSGirish Moodalbail port->lp_mah = NULL; 2708986cab2cSGirish Moodalbail removed = B_TRUE; 2709986cab2cSGirish Moodalbail } 2710986cab2cSGirish Moodalbail err = mac_set_mtu(port->lp_mh, sdu, old_mtu); 2711986cab2cSGirish Moodalbail try_again: 27124c91d6c6SVenugopal Iyer if (removed && (rv = mac_unicast_add(port->lp_mch, NULL, 27134c91d6c6SVenugopal Iyer MAC_UNICAST_PRIMARY | MAC_UNICAST_DISABLE_TX_VID_CHECK, 27144c91d6c6SVenugopal Iyer &port->lp_mah, 0, &diag)) != 0) { 2715986cab2cSGirish Moodalbail /* 2716986cab2cSGirish Moodalbail * following is a workaround for a bug in 'bge' driver. 2717986cab2cSGirish Moodalbail * See CR 6794654 for more information and this work around 2718986cab2cSGirish Moodalbail * will be removed once the CR is fixed. 2719986cab2cSGirish Moodalbail */ 2720986cab2cSGirish Moodalbail if (rv == EIO && retry++ < 3) { 2721986cab2cSGirish Moodalbail delay(2 * hz); 2722986cab2cSGirish Moodalbail goto try_again; 2723986cab2cSGirish Moodalbail } 2724986cab2cSGirish Moodalbail /* 27254c91d6c6SVenugopal Iyer * if mac_unicast_add() failed while setting the MTU, 2726986cab2cSGirish Moodalbail * detach the port from the group. 2727986cab2cSGirish Moodalbail */ 2728986cab2cSGirish Moodalbail mac_perim_enter_by_mh(port->lp_mh, &mph); 2729986cab2cSGirish Moodalbail (void) aggr_grp_detach_port(grp, port); 2730986cab2cSGirish Moodalbail mac_perim_exit(mph); 2731986cab2cSGirish Moodalbail cmn_err(CE_WARN, "Unable to restart the port %s while " 2732986cab2cSGirish Moodalbail "setting MTU. Detaching the port from the aggregation.", 2733986cab2cSGirish Moodalbail mac_client_name(port->lp_mch)); 2734986cab2cSGirish Moodalbail } 2735986cab2cSGirish Moodalbail return (err); 2736986cab2cSGirish Moodalbail } 2737986cab2cSGirish Moodalbail 2738986cab2cSGirish Moodalbail static int 2739986cab2cSGirish Moodalbail aggr_sdu_update(aggr_grp_t *grp, uint32_t sdu) 2740986cab2cSGirish Moodalbail { 2741986cab2cSGirish Moodalbail int err = 0, i, rv; 2742986cab2cSGirish Moodalbail aggr_port_t *port; 2743986cab2cSGirish Moodalbail uint32_t *mtu; 2744986cab2cSGirish Moodalbail 2745986cab2cSGirish Moodalbail ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2746986cab2cSGirish Moodalbail 2747986cab2cSGirish Moodalbail /* 2748986cab2cSGirish Moodalbail * If the MTU being set is equal to aggr group's maximum 2749986cab2cSGirish Moodalbail * allowable value, then there is nothing to change 2750986cab2cSGirish Moodalbail */ 2751986cab2cSGirish Moodalbail if (sdu == grp->lg_max_sdu) 2752986cab2cSGirish Moodalbail return (0); 2753986cab2cSGirish Moodalbail 2754986cab2cSGirish Moodalbail /* 0 is aggr group's min sdu */ 2755986cab2cSGirish Moodalbail if (sdu == 0) 2756986cab2cSGirish Moodalbail return (EINVAL); 2757986cab2cSGirish Moodalbail 2758986cab2cSGirish Moodalbail mtu = kmem_alloc(sizeof (uint32_t) * grp->lg_nports, KM_SLEEP); 2759986cab2cSGirish Moodalbail for (port = grp->lg_ports, i = 0; port != NULL && err == 0; 2760986cab2cSGirish Moodalbail port = port->lp_next, i++) { 2761986cab2cSGirish Moodalbail err = aggr_set_port_sdu(grp, port, sdu, mtu + i); 2762986cab2cSGirish Moodalbail } 2763986cab2cSGirish Moodalbail if (err != 0) { 2764986cab2cSGirish Moodalbail /* recover from error: reset the mtus of the ports */ 2765986cab2cSGirish Moodalbail aggr_port_t *tmp; 2766986cab2cSGirish Moodalbail 2767986cab2cSGirish Moodalbail for (tmp = grp->lg_ports, i = 0; tmp != port; 2768986cab2cSGirish Moodalbail tmp = tmp->lp_next, i++) { 2769986cab2cSGirish Moodalbail (void) aggr_set_port_sdu(grp, tmp, *(mtu + i), NULL); 2770986cab2cSGirish Moodalbail } 2771986cab2cSGirish Moodalbail goto bail; 2772986cab2cSGirish Moodalbail } 2773986cab2cSGirish Moodalbail grp->lg_max_sdu = aggr_grp_max_sdu(grp); 2774986cab2cSGirish Moodalbail rv = mac_maxsdu_update(grp->lg_mh, grp->lg_max_sdu); 2775986cab2cSGirish Moodalbail ASSERT(rv == 0); 2776986cab2cSGirish Moodalbail bail: 2777986cab2cSGirish Moodalbail kmem_free(mtu, sizeof (uint32_t) * grp->lg_nports); 2778986cab2cSGirish Moodalbail return (err); 2779986cab2cSGirish Moodalbail } 2780986cab2cSGirish Moodalbail 2781986cab2cSGirish Moodalbail /* 2782986cab2cSGirish Moodalbail * Callback functions for set/get of properties 2783986cab2cSGirish Moodalbail */ 2784986cab2cSGirish Moodalbail /*ARGSUSED*/ 2785986cab2cSGirish Moodalbail static int 2786986cab2cSGirish Moodalbail aggr_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 2787986cab2cSGirish Moodalbail uint_t pr_valsize, const void *pr_val) 2788986cab2cSGirish Moodalbail { 2789986cab2cSGirish Moodalbail int err = ENOTSUP; 2790986cab2cSGirish Moodalbail aggr_grp_t *grp = m_driver; 2791986cab2cSGirish Moodalbail 2792986cab2cSGirish Moodalbail switch (pr_num) { 2793986cab2cSGirish Moodalbail case MAC_PROP_MTU: { 2794986cab2cSGirish Moodalbail uint32_t mtu; 2795986cab2cSGirish Moodalbail 2796986cab2cSGirish Moodalbail if (pr_valsize < sizeof (mtu)) { 2797986cab2cSGirish Moodalbail err = EINVAL; 2798986cab2cSGirish Moodalbail break; 2799986cab2cSGirish Moodalbail } 2800986cab2cSGirish Moodalbail bcopy(pr_val, &mtu, sizeof (mtu)); 2801986cab2cSGirish Moodalbail err = aggr_sdu_update(grp, mtu); 2802986cab2cSGirish Moodalbail break; 2803986cab2cSGirish Moodalbail } 2804986cab2cSGirish Moodalbail default: 2805986cab2cSGirish Moodalbail break; 2806986cab2cSGirish Moodalbail } 2807986cab2cSGirish Moodalbail return (err); 2808986cab2cSGirish Moodalbail } 2809986cab2cSGirish Moodalbail 28100591ddd0SPrakash Jalan typedef struct rboundary { 28110591ddd0SPrakash Jalan uint32_t bval; 28120591ddd0SPrakash Jalan int btype; 28130591ddd0SPrakash Jalan } rboundary_t; 28140591ddd0SPrakash Jalan 28150591ddd0SPrakash Jalan /* 28160591ddd0SPrakash Jalan * This function finds the intersection of mtu ranges stored in arrays - 28170591ddd0SPrakash Jalan * mrange[0] ... mrange[mcount -1]. It returns the intersection in rval. 28180591ddd0SPrakash Jalan * Individual arrays are assumed to contain non-overlapping ranges. 28190591ddd0SPrakash Jalan * Algorithm: 28200591ddd0SPrakash Jalan * A range has two boundaries - min and max. We scan all arrays and store 28210591ddd0SPrakash Jalan * each boundary as a separate element in a temporary array. We also store 28220591ddd0SPrakash Jalan * the boundary types, min or max, as +1 or -1 respectively in the temporary 28230591ddd0SPrakash Jalan * array. Then we sort the temporary array in ascending order. We scan the 28240591ddd0SPrakash Jalan * sorted array from lower to higher values and keep a cumulative sum of 28250591ddd0SPrakash Jalan * boundary types. Element in the temporary array for which the sum reaches 28260591ddd0SPrakash Jalan * mcount is a min boundary of a range in the result and next element will be 28270591ddd0SPrakash Jalan * max boundary. 28280591ddd0SPrakash Jalan * 28290591ddd0SPrakash Jalan * Example for mcount = 3, 28300591ddd0SPrakash Jalan * 28310591ddd0SPrakash Jalan * ----|_________|-------|_______|----|__|------ mrange[0] 28320591ddd0SPrakash Jalan * 28330591ddd0SPrakash Jalan * -------|________|--|____________|-----|___|-- mrange[1] 28340591ddd0SPrakash Jalan * 28350591ddd0SPrakash Jalan * --------|________________|-------|____|------ mrange[2] 28360591ddd0SPrakash Jalan * 28370591ddd0SPrakash Jalan * 3 2 1 28380591ddd0SPrakash Jalan * \|/ 28390591ddd0SPrakash Jalan * 1 23 2 1 2 3 2 1 01 2 V 0 <- the sum 28400591ddd0SPrakash Jalan * ----|--||-----|-|--|--|--|----|-||-|--|---|-- sorted array 28410591ddd0SPrakash Jalan * 28420591ddd0SPrakash Jalan * same min and max 28430591ddd0SPrakash Jalan * V 28440591ddd0SPrakash Jalan * --------|_____|-------|__|------------|------ intersecting ranges 28450591ddd0SPrakash Jalan */ 28460591ddd0SPrakash Jalan void 28470591ddd0SPrakash Jalan aggr_mtu_range_intersection(mac_propval_range_t **mrange, int mcount, 28480591ddd0SPrakash Jalan mac_propval_uint32_range_t **prval, int *prmaxcnt, int *prcount) 2849f0f2c3a5SGirish Moodalbail { 28500591ddd0SPrakash Jalan mac_propval_uint32_range_t *rval, *ur; 28510591ddd0SPrakash Jalan int rmaxcnt, rcount; 28520591ddd0SPrakash Jalan size_t sz_range32; 28530591ddd0SPrakash Jalan rboundary_t *ta; /* temporary array */ 28540591ddd0SPrakash Jalan rboundary_t temp; 28550591ddd0SPrakash Jalan boolean_t range_started = B_FALSE; 28560591ddd0SPrakash Jalan int i, j, m, sum; 28570591ddd0SPrakash Jalan 28580591ddd0SPrakash Jalan sz_range32 = sizeof (mac_propval_uint32_range_t); 28590591ddd0SPrakash Jalan 28600591ddd0SPrakash Jalan for (i = 0, rmaxcnt = 0; i < mcount; i++) 28610591ddd0SPrakash Jalan rmaxcnt += mrange[i]->mpr_count; 28620591ddd0SPrakash Jalan 28630591ddd0SPrakash Jalan /* Allocate enough space to store the results */ 28640591ddd0SPrakash Jalan rval = kmem_alloc(rmaxcnt * sz_range32, KM_SLEEP); 28650591ddd0SPrakash Jalan 28660591ddd0SPrakash Jalan /* Number of boundaries are twice as many as ranges */ 28670591ddd0SPrakash Jalan ta = kmem_alloc(2 * rmaxcnt * sizeof (rboundary_t), KM_SLEEP); 28680591ddd0SPrakash Jalan 28690591ddd0SPrakash Jalan for (i = 0, m = 0; i < mcount; i++) { 28700591ddd0SPrakash Jalan ur = &(mrange[i]->mpr_range_uint32[0]); 28710591ddd0SPrakash Jalan for (j = 0; j < mrange[i]->mpr_count; j++) { 28720591ddd0SPrakash Jalan ta[m].bval = ur[j].mpur_min; 28730591ddd0SPrakash Jalan ta[m++].btype = 1; 28740591ddd0SPrakash Jalan ta[m].bval = ur[j].mpur_max; 28750591ddd0SPrakash Jalan ta[m++].btype = -1; 28760591ddd0SPrakash Jalan } 28770591ddd0SPrakash Jalan } 28780591ddd0SPrakash Jalan 28790591ddd0SPrakash Jalan /* 28800591ddd0SPrakash Jalan * Sort the temporary array in ascending order of bval; 28810591ddd0SPrakash Jalan * if boundary values are same then sort on btype. 28820591ddd0SPrakash Jalan */ 28830591ddd0SPrakash Jalan for (i = 0; i < m-1; i++) { 28840591ddd0SPrakash Jalan for (j = i+1; j < m; j++) { 28850591ddd0SPrakash Jalan if ((ta[i].bval > ta[j].bval) || 28860591ddd0SPrakash Jalan ((ta[i].bval == ta[j].bval) && 28870591ddd0SPrakash Jalan (ta[i].btype < ta[j].btype))) { 28880591ddd0SPrakash Jalan temp = ta[i]; 28890591ddd0SPrakash Jalan ta[i] = ta[j]; 28900591ddd0SPrakash Jalan ta[j] = temp; 28910591ddd0SPrakash Jalan } 28920591ddd0SPrakash Jalan } 28930591ddd0SPrakash Jalan } 28940591ddd0SPrakash Jalan 28950591ddd0SPrakash Jalan /* Walk through temporary array to find all ranges in the results */ 28960591ddd0SPrakash Jalan for (i = 0, sum = 0, rcount = 0; i < m; i++) { 28970591ddd0SPrakash Jalan sum += ta[i].btype; 28980591ddd0SPrakash Jalan if (sum == mcount) { 28990591ddd0SPrakash Jalan rval[rcount].mpur_min = ta[i].bval; 29000591ddd0SPrakash Jalan range_started = B_TRUE; 29010591ddd0SPrakash Jalan } else if (sum < mcount && range_started) { 29020591ddd0SPrakash Jalan rval[rcount++].mpur_max = ta[i].bval; 29030591ddd0SPrakash Jalan range_started = B_FALSE; 29040591ddd0SPrakash Jalan } 29050591ddd0SPrakash Jalan } 29060591ddd0SPrakash Jalan 29070591ddd0SPrakash Jalan *prval = rval; 29080591ddd0SPrakash Jalan *prmaxcnt = rmaxcnt; 29090591ddd0SPrakash Jalan *prcount = rcount; 291084191983SPrakash Jalan 291184191983SPrakash Jalan kmem_free(ta, 2 * rmaxcnt * sizeof (rboundary_t)); 29120591ddd0SPrakash Jalan } 29130591ddd0SPrakash Jalan 29140591ddd0SPrakash Jalan /* 29150591ddd0SPrakash Jalan * Returns the mtu ranges which could be supported by aggr group. 29160591ddd0SPrakash Jalan * prmaxcnt returns the size of the buffer prval, prcount returns 29170591ddd0SPrakash Jalan * the number of valid entries in prval. Caller is responsible 29180591ddd0SPrakash Jalan * for freeing up prval. 29190591ddd0SPrakash Jalan */ 29200591ddd0SPrakash Jalan int 29210591ddd0SPrakash Jalan aggr_grp_possible_mtu_range(aggr_grp_t *grp, mac_propval_uint32_range_t **prval, 29220591ddd0SPrakash Jalan int *prmaxcnt, int *prcount) 29230591ddd0SPrakash Jalan { 29240591ddd0SPrakash Jalan mac_propval_range_t **vals; 2925f0f2c3a5SGirish Moodalbail aggr_port_t *port; 2926f0f2c3a5SGirish Moodalbail mac_perim_handle_t mph; 29270591ddd0SPrakash Jalan uint_t i, numr; 2928f0f2c3a5SGirish Moodalbail int err = 0; 29290591ddd0SPrakash Jalan size_t sz_propval, sz_range32; 29300591ddd0SPrakash Jalan size_t size; 29310591ddd0SPrakash Jalan 29320591ddd0SPrakash Jalan sz_propval = sizeof (mac_propval_range_t); 29330591ddd0SPrakash Jalan sz_range32 = sizeof (mac_propval_uint32_range_t); 2934f0f2c3a5SGirish Moodalbail 2935f0f2c3a5SGirish Moodalbail ASSERT(MAC_PERIM_HELD(grp->lg_mh)); 2936f0f2c3a5SGirish Moodalbail 29370591ddd0SPrakash Jalan vals = kmem_zalloc(sizeof (mac_propval_range_t *) * grp->lg_nports, 2938f0f2c3a5SGirish Moodalbail KM_SLEEP); 2939f0f2c3a5SGirish Moodalbail 2940f0f2c3a5SGirish Moodalbail for (port = grp->lg_ports, i = 0; port != NULL; 2941f0f2c3a5SGirish Moodalbail port = port->lp_next, i++) { 29420591ddd0SPrakash Jalan 29430591ddd0SPrakash Jalan size = sz_propval; 29440591ddd0SPrakash Jalan vals[i] = kmem_alloc(size, KM_SLEEP); 29450591ddd0SPrakash Jalan vals[i]->mpr_count = 1; 29460591ddd0SPrakash Jalan 2947f0f2c3a5SGirish Moodalbail mac_perim_enter_by_mh(port->lp_mh, &mph); 29480591ddd0SPrakash Jalan 29490dc2366fSVenugopal Iyer err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, 29500591ddd0SPrakash Jalan NULL, 0, vals[i], NULL); 29510591ddd0SPrakash Jalan if (err == ENOSPC) { 29520591ddd0SPrakash Jalan /* 29530591ddd0SPrakash Jalan * Not enough space to hold all ranges. 29540591ddd0SPrakash Jalan * Allocate extra space as indicated and retry. 29550591ddd0SPrakash Jalan */ 29560591ddd0SPrakash Jalan numr = vals[i]->mpr_count; 29570591ddd0SPrakash Jalan kmem_free(vals[i], sz_propval); 29580591ddd0SPrakash Jalan size = sz_propval + (numr - 1) * sz_range32; 29590591ddd0SPrakash Jalan vals[i] = kmem_alloc(size, KM_SLEEP); 29600591ddd0SPrakash Jalan vals[i]->mpr_count = numr; 29610591ddd0SPrakash Jalan err = mac_prop_info(port->lp_mh, MAC_PROP_MTU, NULL, 29620591ddd0SPrakash Jalan NULL, 0, vals[i], NULL); 29630591ddd0SPrakash Jalan ASSERT(err != ENOSPC); 29640591ddd0SPrakash Jalan } 2965f0f2c3a5SGirish Moodalbail mac_perim_exit(mph); 29660591ddd0SPrakash Jalan if (err != 0) { 29670591ddd0SPrakash Jalan kmem_free(vals[i], size); 29680591ddd0SPrakash Jalan vals[i] = NULL; 2969f0f2c3a5SGirish Moodalbail break; 2970f0f2c3a5SGirish Moodalbail } 29710591ddd0SPrakash Jalan } 29720dc2366fSVenugopal Iyer 2973f0f2c3a5SGirish Moodalbail /* 2974f0f2c3a5SGirish Moodalbail * if any of the underlying ports does not support changing MTU then 2975f0f2c3a5SGirish Moodalbail * just return ENOTSUP 2976f0f2c3a5SGirish Moodalbail */ 2977f0f2c3a5SGirish Moodalbail if (port != NULL) { 2978f0f2c3a5SGirish Moodalbail ASSERT(err != 0); 2979f0f2c3a5SGirish Moodalbail goto done; 2980f0f2c3a5SGirish Moodalbail } 29810dc2366fSVenugopal Iyer 29820591ddd0SPrakash Jalan aggr_mtu_range_intersection(vals, grp->lg_nports, prval, prmaxcnt, 29830591ddd0SPrakash Jalan prcount); 29840dc2366fSVenugopal Iyer 29850591ddd0SPrakash Jalan done: 29860591ddd0SPrakash Jalan for (i = 0; i < grp->lg_nports; i++) { 29870591ddd0SPrakash Jalan if (vals[i] != NULL) { 29880591ddd0SPrakash Jalan numr = vals[i]->mpr_count; 29890591ddd0SPrakash Jalan size = sz_propval + (numr - 1) * sz_range32; 29900591ddd0SPrakash Jalan kmem_free(vals[i], size); 29910591ddd0SPrakash Jalan } 29920591ddd0SPrakash Jalan } 29930591ddd0SPrakash Jalan 29940591ddd0SPrakash Jalan kmem_free(vals, sizeof (mac_propval_range_t *) * grp->lg_nports); 2995f0f2c3a5SGirish Moodalbail return (err); 2996f0f2c3a5SGirish Moodalbail } 2997f0f2c3a5SGirish Moodalbail 29980dc2366fSVenugopal Iyer static void 29990dc2366fSVenugopal Iyer aggr_m_propinfo(void *m_driver, const char *pr_name, mac_prop_id_t pr_num, 30000dc2366fSVenugopal Iyer mac_prop_info_handle_t prh) 3001986cab2cSGirish Moodalbail { 3002f0f2c3a5SGirish Moodalbail aggr_grp_t *grp = m_driver; 30030591ddd0SPrakash Jalan mac_propval_uint32_range_t *rval = NULL; 30040591ddd0SPrakash Jalan int i, rcount, rmaxcnt; 30050591ddd0SPrakash Jalan int err = 0; 3006f0f2c3a5SGirish Moodalbail 30070dc2366fSVenugopal Iyer _NOTE(ARGUNUSED(pr_name)); 30080dc2366fSVenugopal Iyer 3009f0f2c3a5SGirish Moodalbail switch (pr_num) { 30100591ddd0SPrakash Jalan case MAC_PROP_MTU: 30110dc2366fSVenugopal Iyer 30120591ddd0SPrakash Jalan err = aggr_grp_possible_mtu_range(grp, &rval, &rmaxcnt, 30130591ddd0SPrakash Jalan &rcount); 30140591ddd0SPrakash Jalan if (err != 0) { 30150591ddd0SPrakash Jalan ASSERT(rval == NULL); 30160dc2366fSVenugopal Iyer return; 3017f0f2c3a5SGirish Moodalbail } 30180591ddd0SPrakash Jalan for (i = 0; i < rcount; i++) { 30190591ddd0SPrakash Jalan mac_prop_info_set_range_uint32(prh, 30200591ddd0SPrakash Jalan rval[i].mpur_min, rval[i].mpur_max); 30210591ddd0SPrakash Jalan } 30220591ddd0SPrakash Jalan kmem_free(rval, sizeof (mac_propval_uint32_range_t) * rmaxcnt); 30230591ddd0SPrakash Jalan break; 30240dc2366fSVenugopal Iyer } 3025986cab2cSGirish Moodalbail } 3026