xref: /illumos-gate/usr/src/uts/common/sys/aggr_impl.h (revision ae9405842e25ee75c6a9fd1996e04b41fbd2eda3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright 2012 OmniTI Computer Consulting, Inc  All rights reserved.
25  * Copyright 2018 Joyent, Inc.
26  * Copyright 2020 RackTop Systems, Inc.
27  */
28 
29 #ifndef	_SYS_AGGR_IMPL_H
30 #define	_SYS_AGGR_IMPL_H
31 
32 #include <sys/types.h>
33 #include <sys/cred.h>
34 #include <sys/mac_ether.h>
35 #include <sys/mac_provider.h>
36 #include <sys/mac_client.h>
37 #include <sys/mac_client_priv.h>
38 #include <sys/aggr_lacp.h>
39 
40 #ifdef	__cplusplus
41 extern "C" {
42 #endif
43 
44 #ifdef _KERNEL
45 
46 #define	AGGR_MINOR_CTL	1		/* control interface minor */
47 
48 /* flags for aggr_grp_modify() */
49 #define	AGGR_MODIFY_POLICY		0x01
50 #define	AGGR_MODIFY_MAC			0x02
51 #define	AGGR_MODIFY_LACP_MODE		0x04
52 #define	AGGR_MODIFY_LACP_TIMER		0x08
53 
54 /*
55  * Possible value of aggr_rseudo_rx_ring_t.arr_flags. Set when the ring entry
56  * in the pseudo RX group is used.
57  */
58 #define	MAC_PSEUDO_RING_INUSE	0x01
59 
60 #define	MAX_GROUPS_PER_PORT	128
61 
62 /*
63  * VLAN filters placed on the Rx pseudo group.
64  */
65 typedef struct aggr_vlan {
66 	list_node_t	av_link;
67 	uint16_t	av_vid;		/* VLAN ID */
68 	uint_t		av_refs;	/* num aggr clients using this VID */
69 } aggr_vlan_t;
70 
71 typedef struct aggr_unicst_addr_s {
72 	uint8_t				aua_addr[ETHERADDRL];
73 	struct aggr_unicst_addr_s	*aua_next;
74 } aggr_unicst_addr_t;
75 
76 typedef struct aggr_pseudo_rx_ring_s {
77 	mac_ring_handle_t		arr_rh;	/* set by aggr_fill_ring() */
78 	struct aggr_port_s		*arr_port;
79 	struct aggr_pseudo_rx_group_s	*arr_grp;
80 	mac_ring_handle_t		arr_hw_rh;
81 	uint_t				arr_flags;
82 	uint64_t			arr_gen;
83 } aggr_pseudo_rx_ring_t;
84 
85 /*
86  * An aggr pseudo group abstracts the underlying ports' HW groups. For
87  * example, if each port has 8 groups (mac_group_t), then the aggr
88  * will create 8 pseudo groups. Each pseudo group represents a
89  * collection of HW groups: one group from each port. If you have
90  * three ports then the pseudo group stands in for three HW groups.
91  */
92 typedef struct aggr_pseudo_rx_group_s {
93 	uint_t			arg_index;
94 	struct aggr_grp_s	*arg_grp; /* filled in by aggr_fill_group() */
95 	mac_group_handle_t	arg_gh;   /* filled in by aggr_fill_group() */
96 	aggr_unicst_addr_t	*arg_macaddr;
97 	aggr_pseudo_rx_ring_t	arg_rings[MAX_RINGS_PER_GROUP];
98 	uint_t			arg_ring_cnt;
99 	uint_t			arg_untagged; /* num clients untagged */
100 	list_t			arg_vlans;    /* VLANs on this group */
101 } aggr_pseudo_rx_group_t;
102 
103 typedef struct aggr_pseudo_tx_ring_s {
104 	mac_ring_handle_t	atr_rh;	/* filled in by aggr_fill_ring() */
105 	struct aggr_port_s	*atr_port;
106 	mac_ring_handle_t	atr_hw_rh;
107 	uint_t			atr_flags;
108 } aggr_pseudo_tx_ring_t;
109 
110 typedef struct aggr_pseudo_tx_group_s {
111 	mac_group_handle_t	atg_gh;	/* filled in by aggr_fill_group() */
112 	uint_t			atg_ring_cnt;
113 	aggr_pseudo_tx_ring_t	atg_rings[MAX_RINGS_PER_GROUP];
114 } aggr_pseudo_tx_group_t;
115 
116 /*
117  * A link aggregation MAC port.
118  * Note that lp_next is protected by the lg_lock of the group the
119  * port is part of.
120  */
121 typedef struct aggr_port_s {
122 	struct aggr_port_s *lp_next;
123 	struct aggr_grp_s *lp_grp;		/* back ptr to group */
124 	datalink_id_t	lp_linkid;
125 	uint16_t	lp_portid;
126 	uint8_t		lp_addr[ETHERADDRL];	/* port MAC address */
127 	uint32_t	lp_refs;		/* refcount */
128 	aggr_port_state_t lp_state;
129 	uint32_t	lp_started : 1,
130 			lp_tx_enabled : 1,
131 			lp_collector_enabled : 1,
132 			lp_promisc_on : 1,
133 			lp_no_link_update : 1,
134 			lp_tx_grp_added : 1,
135 			lp_closing : 1,
136 			lp_pad_bits : 25;
137 	mac_handle_t	lp_mh;
138 
139 	mac_client_handle_t	lp_mch;
140 
141 	const mac_info_t *lp_mip;
142 	mac_notify_handle_t lp_mnh;
143 	uint_t		lp_tx_idx;		/* idx in group's tx array */
144 	uint64_t	lp_ifspeed;
145 	link_state_t	lp_link_state;
146 	link_duplex_t	lp_link_duplex;
147 	uint64_t	lp_stat[MAC_NSTAT];
148 	uint64_t	lp_ether_stat[ETHER_NSTAT];
149 	aggr_lacp_port_t lp_lacp;		/* LACP state */
150 	lacp_stats_t	lp_lacp_stats;
151 	uint32_t	lp_margin;
152 
153 	mac_unicast_handle_t lp_mah;
154 
155 	/* List of non-primary addresses that requires promiscous mode set */
156 	aggr_unicst_addr_t	*lp_prom_addr;
157 
158 	/*
159 	 * References to the underlying HW Rx groups of this port.
160 	 * Used by aggr to program HW classification for the pseudo
161 	 * groups.
162 	 */
163 	mac_group_handle_t	lp_hwghs[MAX_GROUPS_PER_PORT];
164 
165 	uint_t			lp_tx_ring_alloc;
166 	uint_t			lp_tx_ring_cnt;
167 	/* handles of the underlying HW TX rings */
168 	mac_ring_handle_t	*lp_tx_rings;
169 	/*
170 	 * Handles of the pseudo TX rings. Each of them maps to
171 	 * corresponding hardware TX ring in lp_tx_rings[]. A
172 	 * pseudo TX ring is presented to aggr primary mac
173 	 * client even when underlying NIC has no TX ring.
174 	 */
175 	mac_ring_handle_t	*lp_pseudo_tx_rings;
176 	void			*lp_tx_notify_mh;
177 } aggr_port_t;
178 
179 /*
180  * A link aggregation group.
181  *
182  * The following per-group flags are defined:
183  *
184  * - lg_addr_fixed: set when the MAC address has been explicitely set
185  *   when the group was created, or by a m_unicst_set() request.
186  *   If this flag is not set, the MAC address of the group will be
187  *   set to the first port that is added to the group.
188  *
189  * - lg_add_set: used only when lg_addr_fixed is not set. Captures whether
190  *   the MAC address was initialized according to the members of the group.
191  *   When set, the lg_port field points to the port from which the
192  *   MAC address was initialized.
193  *
194  */
195 typedef struct aggr_grp_s {
196 	datalink_id_t	lg_linkid;
197 	uint16_t	lg_key;			/* key (group port number) */
198 	uint32_t	lg_refs;		/* refcount */
199 	uint16_t	lg_nports;		/* number of MAC ports */
200 	uint16_t	lg_nports_high;		/* highest no. of MAC ports */
201 	uint8_t		lg_addr[ETHERADDRL];	/* group MAC address */
202 	uint16_t
203 			lg_closing : 1,
204 			lg_addr_fixed : 1,	/* fixed MAC address? */
205 			lg_started : 1,		/* group started? */
206 			lg_promisc : 1,		/* in promiscuous mode? */
207 			lg_zcopy : 1,
208 			lg_vlan : 1,
209 			lg_force : 1,
210 			lg_lso : 1,
211 			lg_pad_bits : 8;
212 	aggr_port_t	*lg_ports;		/* list of configured ports */
213 	aggr_port_t	*lg_mac_addr_port;	/* using address of this port */
214 	mac_handle_t	lg_mh;
215 	zoneid_t	lg_zoneid;
216 	uint_t		lg_nattached_ports;
217 	krwlock_t	lg_tx_lock;
218 	uint_t		lg_ntx_ports;
219 	aggr_port_t	**lg_tx_ports;		/* array of tx ports */
220 	uint_t		lg_tx_ports_size;	/* size of lg_tx_ports */
221 	uint32_t	lg_tx_policy;		/* outbound policy */
222 	uint8_t		lg_mac_tx_policy;
223 	link_state_t	lg_link_state;
224 
225 
226 	/*
227 	 * The lg_stat_lock must be held when accessing these fields.
228 	 */
229 	kmutex_t	lg_stat_lock;
230 	uint64_t	lg_ifspeed;
231 	link_duplex_t	lg_link_duplex;
232 	uint64_t	lg_stat[MAC_NSTAT];
233 	uint64_t	lg_ether_stat[ETHER_NSTAT];
234 
235 	aggr_lacp_mode_t lg_lacp_mode;		/* off, active, or passive */
236 	Agg_t		aggr;			/* 802.3ad data */
237 	uint32_t	lg_hcksum_txflags;
238 	uint_t		lg_max_sdu;
239 	uint32_t	lg_margin;
240 	mac_capab_lso_t lg_cap_lso;
241 
242 	/*
243 	 * The following fields are used by the LACP packets processing.
244 	 * Specifically, as the LACP packets processing is not performance
245 	 * critical, all LACP packets will be handled by a dedicated thread
246 	 * instead of in the mac_rx() call. This is to avoid the dead lock
247 	 * with mac_unicast_remove(), which holding the mac perimeter of the
248 	 * aggr, and wait for the mr_refcnt of the RX ring to drop to zero.
249 	 */
250 	kmutex_t	lg_lacp_lock;
251 	kcondvar_t	lg_lacp_cv;
252 	mblk_t		*lg_lacp_head;
253 	mblk_t		*lg_lacp_tail;
254 	kthread_t	*lg_lacp_rx_thread;
255 	boolean_t	lg_lacp_done;
256 
257 	uint_t			lg_rx_group_count;
258 	aggr_pseudo_rx_group_t	lg_rx_groups[MAX_GROUPS_PER_PORT];
259 
260 	aggr_pseudo_tx_group_t	lg_tx_group;
261 
262 	kmutex_t	lg_tx_flowctl_lock;
263 	kcondvar_t	lg_tx_flowctl_cv;
264 	uint_t		lg_tx_blocked_cnt;
265 	mac_ring_handle_t	*lg_tx_blocked_rings;
266 	kthread_t	*lg_tx_notify_thread;
267 	boolean_t	lg_tx_notify_done;
268 
269 	/*
270 	 * The following fields are used by aggr to wait for all the
271 	 * aggr_port_notify_cb() and aggr_port_timer_thread() to finish
272 	 * before it calls mac_unregister() when the aggr is deleted.
273 	 */
274 	kmutex_t	lg_port_lock;
275 	kcondvar_t	lg_port_cv;
276 	int		lg_port_ref;
277 } aggr_grp_t;
278 
279 #define	AGGR_GRP_REFHOLD(grp) {			\
280 	atomic_inc_32(&(grp)->lg_refs);	\
281 	ASSERT((grp)->lg_refs != 0);		\
282 }
283 
284 #define	AGGR_GRP_REFRELE(grp) {					\
285 	ASSERT((grp)->lg_refs != 0);				\
286 	membar_exit();						\
287 	if (atomic_dec_32_nv(&(grp)->lg_refs) == 0)		\
288 		aggr_grp_free(grp);				\
289 }
290 
291 #define	AGGR_PORT_REFHOLD(port) {		\
292 	atomic_inc_32(&(port)->lp_refs);	\
293 	ASSERT((port)->lp_refs != 0);		\
294 }
295 
296 #define	AGGR_PORT_REFRELE(port) {				\
297 	ASSERT((port)->lp_refs != 0);				\
298 	membar_exit();						\
299 	if (atomic_dec_32_nv(&(port)->lp_refs) == 0)	\
300 		aggr_port_free(port);				\
301 }
302 
303 extern dev_info_t *aggr_dip;
304 extern int aggr_ioc_init(void);
305 extern void aggr_ioc_fini(void);
306 
307 typedef int (*aggr_grp_info_new_grp_fn_t)(void *, datalink_id_t, uint32_t,
308     uchar_t *, boolean_t, boolean_t, uint32_t, uint32_t, aggr_lacp_mode_t,
309     aggr_lacp_timer_t);
310 typedef int (*aggr_grp_info_new_port_fn_t)(void *, datalink_id_t, uchar_t *,
311     aggr_port_state_t, aggr_lacp_state_t *);
312 
313 extern void aggr_grp_init(void);
314 extern void aggr_grp_fini(void);
315 extern int aggr_grp_create(datalink_id_t, uint32_t, uint_t, laioc_port_t *,
316     uint32_t, boolean_t, boolean_t, uchar_t *, aggr_lacp_mode_t,
317     aggr_lacp_timer_t, cred_t *);
318 extern int aggr_grp_delete(datalink_id_t, cred_t *);
319 extern void aggr_grp_free(aggr_grp_t *);
320 
321 extern int aggr_grp_info(datalink_id_t, void *, aggr_grp_info_new_grp_fn_t,
322     aggr_grp_info_new_port_fn_t, cred_t *);
323 extern void aggr_grp_notify(aggr_grp_t *, uint32_t);
324 extern boolean_t aggr_grp_attach_port(aggr_grp_t *, aggr_port_t *);
325 extern boolean_t aggr_grp_detach_port(aggr_grp_t *, aggr_port_t *);
326 extern void aggr_grp_port_mac_changed(aggr_grp_t *, aggr_port_t *,
327     boolean_t *, boolean_t *);
328 extern int aggr_grp_add_ports(datalink_id_t, uint_t, boolean_t,
329     laioc_port_t *);
330 extern int aggr_grp_rem_ports(datalink_id_t, uint_t, laioc_port_t *);
331 extern boolean_t aggr_grp_update_ports_mac(aggr_grp_t *);
332 extern int aggr_grp_modify(datalink_id_t, uint8_t, uint32_t, boolean_t,
333     const uchar_t *, aggr_lacp_mode_t, aggr_lacp_timer_t);
334 extern void aggr_grp_multicst_port(aggr_port_t *, boolean_t);
335 extern uint_t aggr_grp_count(void);
336 extern void aggr_grp_update_default(aggr_grp_t *);
337 
338 extern void aggr_port_init(void);
339 extern void aggr_port_fini(void);
340 extern int aggr_port_create(aggr_grp_t *, const datalink_id_t, boolean_t,
341     aggr_port_t **);
342 extern void aggr_port_delete(aggr_port_t *);
343 extern void aggr_port_free(aggr_port_t *);
344 extern int aggr_port_start(aggr_port_t *);
345 extern void aggr_port_stop(aggr_port_t *);
346 extern int aggr_port_promisc(aggr_port_t *, boolean_t);
347 extern int aggr_port_unicst(aggr_port_t *);
348 extern int aggr_port_multicst(void *, boolean_t, const uint8_t *);
349 extern uint64_t aggr_port_stat(aggr_port_t *, uint_t);
350 extern boolean_t aggr_port_notify_link(aggr_grp_t *, aggr_port_t *);
351 extern void aggr_port_init_callbacks(aggr_port_t *);
352 
353 extern void aggr_recv_cb(void *, mac_resource_handle_t, mblk_t *, boolean_t);
354 
355 extern void aggr_tx_ring_update(void *, uintptr_t);
356 extern void aggr_tx_notify_thread(void *);
357 extern void aggr_send_port_enable(aggr_port_t *);
358 extern void aggr_send_port_disable(aggr_port_t *);
359 extern void aggr_send_update_policy(aggr_grp_t *, uint32_t);
360 
361 extern void aggr_lacp_init(void);
362 extern void aggr_lacp_fini(void);
363 extern void aggr_lacp_init_port(aggr_port_t *);
364 extern void aggr_lacp_init_grp(aggr_grp_t *);
365 extern void aggr_lacp_set_mode(aggr_grp_t *, aggr_lacp_mode_t,
366     aggr_lacp_timer_t);
367 extern void aggr_lacp_update_mode(aggr_grp_t *, aggr_lacp_mode_t);
368 extern void aggr_lacp_update_timer(aggr_grp_t *, aggr_lacp_timer_t);
369 extern void aggr_lacp_rx_enqueue(aggr_port_t *, mblk_t *);
370 extern void aggr_lacp_port_attached(aggr_port_t *);
371 extern void aggr_lacp_port_detached(aggr_port_t *);
372 extern void aggr_port_lacp_set_mode(aggr_grp_t *, aggr_port_t *);
373 
374 extern void aggr_lacp_rx_thread(void *);
375 extern void aggr_recv_lacp(aggr_port_t *, mac_resource_handle_t, mblk_t *);
376 
377 extern void aggr_grp_port_hold(aggr_port_t *);
378 extern void aggr_grp_port_rele(aggr_port_t *);
379 extern void aggr_grp_port_wait(aggr_grp_t *);
380 
381 extern int aggr_port_addmac(aggr_port_t *, uint_t, const uint8_t *);
382 extern void aggr_port_remmac(aggr_port_t *, uint_t, const uint8_t *);
383 
384 extern int aggr_port_addvlan(aggr_port_t *, uint_t, uint16_t);
385 extern int aggr_port_remvlan(aggr_port_t *, uint_t, uint16_t);
386 
387 extern mblk_t *aggr_ring_tx(void *, mblk_t *);
388 extern mblk_t *aggr_find_tx_ring(void *, mblk_t *,
389     uintptr_t, mac_ring_handle_t *);
390 
391 #endif	/* _KERNEL */
392 
393 #ifdef	__cplusplus
394 }
395 #endif
396 
397 #endif	/* _SYS_AGGR_IMPL_H */
398