1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2012 OmniTI Computer Consulting, Inc All rights reserved. 25 * Copyright 2018 Joyent, Inc. 26 * Copyright 2020 RackTop Systems, Inc. 27 */ 28 29 #ifndef _SYS_AGGR_IMPL_H 30 #define _SYS_AGGR_IMPL_H 31 32 #include <sys/types.h> 33 #include <sys/cred.h> 34 #include <sys/mac_ether.h> 35 #include <sys/mac_provider.h> 36 #include <sys/mac_client.h> 37 #include <sys/mac_client_priv.h> 38 #include <sys/aggr_lacp.h> 39 40 #ifdef __cplusplus 41 extern "C" { 42 #endif 43 44 #ifdef _KERNEL 45 46 #define AGGR_MINOR_CTL 1 /* control interface minor */ 47 48 /* flags for aggr_grp_modify() */ 49 #define AGGR_MODIFY_POLICY 0x01 50 #define AGGR_MODIFY_MAC 0x02 51 #define AGGR_MODIFY_LACP_MODE 0x04 52 #define AGGR_MODIFY_LACP_TIMER 0x08 53 54 /* 55 * Possible value of aggr_rseudo_rx_ring_t.arr_flags. Set when the ring entry 56 * in the pseudo RX group is used. 57 */ 58 #define MAC_PSEUDO_RING_INUSE 0x01 59 60 #define MAX_GROUPS_PER_PORT 128 61 62 /* 63 * VLAN filters placed on the Rx pseudo group. 64 */ 65 typedef struct aggr_vlan { 66 list_node_t av_link; 67 uint16_t av_vid; /* VLAN ID */ 68 uint_t av_refs; /* num aggr clients using this VID */ 69 } aggr_vlan_t; 70 71 typedef struct aggr_unicst_addr_s { 72 uint8_t aua_addr[ETHERADDRL]; 73 struct aggr_unicst_addr_s *aua_next; 74 } aggr_unicst_addr_t; 75 76 typedef struct aggr_pseudo_rx_ring_s { 77 mac_ring_handle_t arr_rh; /* set by aggr_fill_ring() */ 78 struct aggr_port_s *arr_port; 79 struct aggr_pseudo_rx_group_s *arr_grp; 80 mac_ring_handle_t arr_hw_rh; 81 uint_t arr_flags; 82 uint64_t arr_gen; 83 } aggr_pseudo_rx_ring_t; 84 85 /* 86 * An aggr pseudo group abstracts the underlying ports' HW groups. For 87 * example, if each port has 8 groups (mac_group_t), then the aggr 88 * will create 8 pseudo groups. Each pseudo group represents a 89 * collection of HW groups: one group from each port. If you have 90 * three ports then the pseudo group stands in for three HW groups. 91 */ 92 typedef struct aggr_pseudo_rx_group_s { 93 uint_t arg_index; 94 struct aggr_grp_s *arg_grp; /* filled in by aggr_fill_group() */ 95 mac_group_handle_t arg_gh; /* filled in by aggr_fill_group() */ 96 aggr_unicst_addr_t *arg_macaddr; 97 aggr_pseudo_rx_ring_t arg_rings[MAX_RINGS_PER_GROUP]; 98 uint_t arg_ring_cnt; 99 uint_t arg_untagged; /* num clients untagged */ 100 list_t arg_vlans; /* VLANs on this group */ 101 } aggr_pseudo_rx_group_t; 102 103 typedef struct aggr_pseudo_tx_ring_s { 104 mac_ring_handle_t atr_rh; /* filled in by aggr_fill_ring() */ 105 struct aggr_port_s *atr_port; 106 mac_ring_handle_t atr_hw_rh; 107 uint_t atr_flags; 108 } aggr_pseudo_tx_ring_t; 109 110 typedef struct aggr_pseudo_tx_group_s { 111 mac_group_handle_t atg_gh; /* filled in by aggr_fill_group() */ 112 uint_t atg_ring_cnt; 113 aggr_pseudo_tx_ring_t atg_rings[MAX_RINGS_PER_GROUP]; 114 } aggr_pseudo_tx_group_t; 115 116 /* 117 * A link aggregation MAC port. 118 * Note that lp_next is protected by the lg_lock of the group the 119 * port is part of. 120 */ 121 typedef struct aggr_port_s { 122 struct aggr_port_s *lp_next; 123 struct aggr_grp_s *lp_grp; /* back ptr to group */ 124 datalink_id_t lp_linkid; 125 uint16_t lp_portid; 126 uint8_t lp_addr[ETHERADDRL]; /* port MAC address */ 127 uint32_t lp_refs; /* refcount */ 128 aggr_port_state_t lp_state; 129 uint32_t lp_started : 1, 130 lp_tx_enabled : 1, 131 lp_collector_enabled : 1, 132 lp_promisc_on : 1, 133 lp_no_link_update : 1, 134 lp_tx_grp_added : 1, 135 lp_closing : 1, 136 lp_pad_bits : 25; 137 mac_handle_t lp_mh; 138 139 mac_client_handle_t lp_mch; 140 141 const mac_info_t *lp_mip; 142 mac_notify_handle_t lp_mnh; 143 uint_t lp_tx_idx; /* idx in group's tx array */ 144 uint64_t lp_ifspeed; 145 link_state_t lp_link_state; 146 link_duplex_t lp_link_duplex; 147 uint64_t lp_stat[MAC_NSTAT]; 148 uint64_t lp_ether_stat[ETHER_NSTAT]; 149 aggr_lacp_port_t lp_lacp; /* LACP state */ 150 lacp_stats_t lp_lacp_stats; 151 uint32_t lp_margin; 152 153 mac_unicast_handle_t lp_mah; 154 155 /* List of non-primary addresses that requires promiscous mode set */ 156 aggr_unicst_addr_t *lp_prom_addr; 157 158 /* 159 * References to the underlying HW Rx groups of this port. 160 * Used by aggr to program HW classification for the pseudo 161 * groups. 162 */ 163 mac_group_handle_t lp_hwghs[MAX_GROUPS_PER_PORT]; 164 165 uint_t lp_tx_ring_alloc; 166 uint_t lp_tx_ring_cnt; 167 /* handles of the underlying HW TX rings */ 168 mac_ring_handle_t *lp_tx_rings; 169 /* 170 * Handles of the pseudo TX rings. Each of them maps to 171 * corresponding hardware TX ring in lp_tx_rings[]. A 172 * pseudo TX ring is presented to aggr primary mac 173 * client even when underlying NIC has no TX ring. 174 */ 175 mac_ring_handle_t *lp_pseudo_tx_rings; 176 void *lp_tx_notify_mh; 177 } aggr_port_t; 178 179 /* 180 * A link aggregation group. 181 * 182 * The following per-group flags are defined: 183 * 184 * - lg_addr_fixed: set when the MAC address has been explicitely set 185 * when the group was created, or by a m_unicst_set() request. 186 * If this flag is not set, the MAC address of the group will be 187 * set to the first port that is added to the group. 188 * 189 * - lg_add_set: used only when lg_addr_fixed is not set. Captures whether 190 * the MAC address was initialized according to the members of the group. 191 * When set, the lg_port field points to the port from which the 192 * MAC address was initialized. 193 * 194 */ 195 typedef struct aggr_grp_s { 196 datalink_id_t lg_linkid; 197 uint16_t lg_key; /* key (group port number) */ 198 uint32_t lg_refs; /* refcount */ 199 uint16_t lg_nports; /* number of MAC ports */ 200 uint16_t lg_nports_high; /* highest no. of MAC ports */ 201 uint8_t lg_addr[ETHERADDRL]; /* group MAC address */ 202 uint16_t 203 lg_closing : 1, 204 lg_addr_fixed : 1, /* fixed MAC address? */ 205 lg_started : 1, /* group started? */ 206 lg_promisc : 1, /* in promiscuous mode? */ 207 lg_zcopy : 1, 208 lg_vlan : 1, 209 lg_force : 1, 210 lg_lso : 1, 211 lg_pad_bits : 8; 212 aggr_port_t *lg_ports; /* list of configured ports */ 213 aggr_port_t *lg_mac_addr_port; /* using address of this port */ 214 mac_handle_t lg_mh; 215 zoneid_t lg_zoneid; 216 uint_t lg_nattached_ports; 217 krwlock_t lg_tx_lock; 218 uint_t lg_ntx_ports; 219 aggr_port_t **lg_tx_ports; /* array of tx ports */ 220 uint_t lg_tx_ports_size; /* size of lg_tx_ports */ 221 uint32_t lg_tx_policy; /* outbound policy */ 222 uint8_t lg_mac_tx_policy; 223 link_state_t lg_link_state; 224 225 226 /* 227 * The lg_stat_lock must be held when accessing these fields. 228 */ 229 kmutex_t lg_stat_lock; 230 uint64_t lg_ifspeed; 231 link_duplex_t lg_link_duplex; 232 uint64_t lg_stat[MAC_NSTAT]; 233 uint64_t lg_ether_stat[ETHER_NSTAT]; 234 235 aggr_lacp_mode_t lg_lacp_mode; /* off, active, or passive */ 236 Agg_t aggr; /* 802.3ad data */ 237 uint32_t lg_hcksum_txflags; 238 uint_t lg_max_sdu; 239 uint32_t lg_margin; 240 mac_capab_lso_t lg_cap_lso; 241 242 /* 243 * The following fields are used by the LACP packets processing. 244 * Specifically, as the LACP packets processing is not performance 245 * critical, all LACP packets will be handled by a dedicated thread 246 * instead of in the mac_rx() call. This is to avoid the dead lock 247 * with mac_unicast_remove(), which holding the mac perimeter of the 248 * aggr, and wait for the mr_refcnt of the RX ring to drop to zero. 249 */ 250 kmutex_t lg_lacp_lock; 251 kcondvar_t lg_lacp_cv; 252 mblk_t *lg_lacp_head; 253 mblk_t *lg_lacp_tail; 254 kthread_t *lg_lacp_rx_thread; 255 boolean_t lg_lacp_done; 256 257 uint_t lg_rx_group_count; 258 aggr_pseudo_rx_group_t lg_rx_groups[MAX_GROUPS_PER_PORT]; 259 260 aggr_pseudo_tx_group_t lg_tx_group; 261 262 kmutex_t lg_tx_flowctl_lock; 263 kcondvar_t lg_tx_flowctl_cv; 264 uint_t lg_tx_blocked_cnt; 265 mac_ring_handle_t *lg_tx_blocked_rings; 266 kthread_t *lg_tx_notify_thread; 267 boolean_t lg_tx_notify_done; 268 269 /* 270 * The following fields are used by aggr to wait for all the 271 * aggr_port_notify_cb() and aggr_port_timer_thread() to finish 272 * before it calls mac_unregister() when the aggr is deleted. 273 */ 274 kmutex_t lg_port_lock; 275 kcondvar_t lg_port_cv; 276 int lg_port_ref; 277 } aggr_grp_t; 278 279 #define AGGR_GRP_REFHOLD(grp) { \ 280 atomic_inc_32(&(grp)->lg_refs); \ 281 ASSERT((grp)->lg_refs != 0); \ 282 } 283 284 #define AGGR_GRP_REFRELE(grp) { \ 285 ASSERT((grp)->lg_refs != 0); \ 286 membar_exit(); \ 287 if (atomic_dec_32_nv(&(grp)->lg_refs) == 0) \ 288 aggr_grp_free(grp); \ 289 } 290 291 #define AGGR_PORT_REFHOLD(port) { \ 292 atomic_inc_32(&(port)->lp_refs); \ 293 ASSERT((port)->lp_refs != 0); \ 294 } 295 296 #define AGGR_PORT_REFRELE(port) { \ 297 ASSERT((port)->lp_refs != 0); \ 298 membar_exit(); \ 299 if (atomic_dec_32_nv(&(port)->lp_refs) == 0) \ 300 aggr_port_free(port); \ 301 } 302 303 extern dev_info_t *aggr_dip; 304 extern int aggr_ioc_init(void); 305 extern void aggr_ioc_fini(void); 306 307 typedef int (*aggr_grp_info_new_grp_fn_t)(void *, datalink_id_t, uint32_t, 308 uchar_t *, boolean_t, boolean_t, uint32_t, uint32_t, aggr_lacp_mode_t, 309 aggr_lacp_timer_t); 310 typedef int (*aggr_grp_info_new_port_fn_t)(void *, datalink_id_t, uchar_t *, 311 aggr_port_state_t, aggr_lacp_state_t *); 312 313 extern void aggr_grp_init(void); 314 extern void aggr_grp_fini(void); 315 extern int aggr_grp_create(datalink_id_t, uint32_t, uint_t, laioc_port_t *, 316 uint32_t, boolean_t, boolean_t, uchar_t *, aggr_lacp_mode_t, 317 aggr_lacp_timer_t, cred_t *); 318 extern int aggr_grp_delete(datalink_id_t, cred_t *); 319 extern void aggr_grp_free(aggr_grp_t *); 320 321 extern int aggr_grp_info(datalink_id_t, void *, aggr_grp_info_new_grp_fn_t, 322 aggr_grp_info_new_port_fn_t, cred_t *); 323 extern void aggr_grp_notify(aggr_grp_t *, uint32_t); 324 extern boolean_t aggr_grp_attach_port(aggr_grp_t *, aggr_port_t *); 325 extern boolean_t aggr_grp_detach_port(aggr_grp_t *, aggr_port_t *); 326 extern void aggr_grp_port_mac_changed(aggr_grp_t *, aggr_port_t *, 327 boolean_t *, boolean_t *); 328 extern int aggr_grp_add_ports(datalink_id_t, uint_t, boolean_t, 329 laioc_port_t *); 330 extern int aggr_grp_rem_ports(datalink_id_t, uint_t, laioc_port_t *); 331 extern boolean_t aggr_grp_update_ports_mac(aggr_grp_t *); 332 extern int aggr_grp_modify(datalink_id_t, uint8_t, uint32_t, boolean_t, 333 const uchar_t *, aggr_lacp_mode_t, aggr_lacp_timer_t); 334 extern void aggr_grp_multicst_port(aggr_port_t *, boolean_t); 335 extern uint_t aggr_grp_count(void); 336 extern void aggr_grp_update_default(aggr_grp_t *); 337 338 extern void aggr_port_init(void); 339 extern void aggr_port_fini(void); 340 extern int aggr_port_create(aggr_grp_t *, const datalink_id_t, boolean_t, 341 aggr_port_t **); 342 extern void aggr_port_delete(aggr_port_t *); 343 extern void aggr_port_free(aggr_port_t *); 344 extern int aggr_port_start(aggr_port_t *); 345 extern void aggr_port_stop(aggr_port_t *); 346 extern int aggr_port_promisc(aggr_port_t *, boolean_t); 347 extern int aggr_port_unicst(aggr_port_t *); 348 extern int aggr_port_multicst(void *, boolean_t, const uint8_t *); 349 extern uint64_t aggr_port_stat(aggr_port_t *, uint_t); 350 extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *); 351 extern void aggr_port_init_callbacks(aggr_port_t *); 352 353 extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t); 354 355 extern void aggr_tx_ring_update(void *, uintptr_t); 356 extern void aggr_tx_notify_thread(void *); 357 extern void aggr_send_port_enable(aggr_port_t *); 358 extern void aggr_send_port_disable(aggr_port_t *); 359 extern void aggr_send_update_policy(aggr_grp_t *, uint32_t); 360 361 extern void aggr_lacp_init(void); 362 extern void aggr_lacp_fini(void); 363 extern void aggr_lacp_init_port(aggr_port_t *); 364 extern void aggr_lacp_init_grp(aggr_grp_t *); 365 extern void aggr_lacp_set_mode(aggr_grp_t *, aggr_lacp_mode_t, 366 aggr_lacp_timer_t); 367 extern void aggr_lacp_update_mode(aggr_grp_t *, aggr_lacp_mode_t); 368 extern void aggr_lacp_update_timer(aggr_grp_t *, aggr_lacp_timer_t); 369 extern void aggr_lacp_rx_enqueue(aggr_port_t *, mblk_t *); 370 extern void aggr_lacp_port_attached(aggr_port_t *); 371 extern void aggr_lacp_port_detached(aggr_port_t *); 372 extern void aggr_port_lacp_set_mode(aggr_grp_t *, aggr_port_t *); 373 374 extern void aggr_lacp_rx_thread(void *); 375 extern void aggr_recv_lacp(aggr_port_t *, mac_resource_handle_t, mblk_t *); 376 377 extern void aggr_grp_port_hold(aggr_port_t *); 378 extern void aggr_grp_port_rele(aggr_port_t *); 379 extern void aggr_grp_port_wait(aggr_grp_t *); 380 381 extern int aggr_port_addmac(aggr_port_t *, uint_t, const uint8_t *); 382 extern void aggr_port_remmac(aggr_port_t *, uint_t, const uint8_t *); 383 384 extern int aggr_port_addvlan(aggr_port_t *, uint_t, uint16_t); 385 extern int aggr_port_remvlan(aggr_port_t *, uint_t, uint16_t); 386 387 extern mblk_t *aggr_ring_tx(void *, mblk_t *); 388 extern mblk_t *aggr_find_tx_ring(void *, mblk_t *, 389 uintptr_t, mac_ring_handle_t *); 390 391 #endif /* _KERNEL */ 392 393 #ifdef __cplusplus 394 } 395 #endif 396 397 #endif /* _SYS_AGGR_IMPL_H */ 398