xref: /titanic_50/usr/src/uts/common/sys/mac_flow_impl.h (revision 0dc2366f7b9f9f36e10909b1e95edbf2a261c2ac)
1da14cebeSEric Cheng /*
2da14cebeSEric Cheng  * CDDL HEADER START
3da14cebeSEric Cheng  *
4da14cebeSEric Cheng  * The contents of this file are subject to the terms of the
5da14cebeSEric Cheng  * Common Development and Distribution License (the "License").
6da14cebeSEric Cheng  * You may not use this file except in compliance with the License.
7da14cebeSEric Cheng  *
8da14cebeSEric Cheng  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9da14cebeSEric Cheng  * or http://www.opensolaris.org/os/licensing.
10da14cebeSEric Cheng  * See the License for the specific language governing permissions
11da14cebeSEric Cheng  * and limitations under the License.
12da14cebeSEric Cheng  *
13da14cebeSEric Cheng  * When distributing Covered Code, include this CDDL HEADER in each
14da14cebeSEric Cheng  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15da14cebeSEric Cheng  * If applicable, add the following below this CDDL HEADER, with the
16da14cebeSEric Cheng  * fields enclosed by brackets "[]" replaced with your own identifying
17da14cebeSEric Cheng  * information: Portions Copyright [yyyy] [name of copyright owner]
18da14cebeSEric Cheng  *
19da14cebeSEric Cheng  * CDDL HEADER END
20da14cebeSEric Cheng  */
21da14cebeSEric Cheng 
22da14cebeSEric Cheng /*
23*0dc2366fSVenugopal Iyer  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24da14cebeSEric Cheng  * Use is subject to license terms.
25da14cebeSEric Cheng  */
26da14cebeSEric Cheng 
27da14cebeSEric Cheng #ifndef	_MAC_FLOW_IMPL_H
28da14cebeSEric Cheng #define	_MAC_FLOW_IMPL_H
29da14cebeSEric Cheng 
30da14cebeSEric Cheng #ifdef	__cplusplus
31da14cebeSEric Cheng extern "C" {
32da14cebeSEric Cheng #endif
33da14cebeSEric Cheng 
34da14cebeSEric Cheng #include <sys/param.h>
35da14cebeSEric Cheng #include <sys/atomic.h>
36da14cebeSEric Cheng #include <sys/ksynch.h>
37da14cebeSEric Cheng #include <sys/mac_flow.h>
38da14cebeSEric Cheng #include <sys/stream.h>
39da14cebeSEric Cheng #include <sys/sdt.h>
40da14cebeSEric Cheng #include <net/if.h>
41da14cebeSEric Cheng 
42da14cebeSEric Cheng /*
43da14cebeSEric Cheng  * Macros to increment/decrement the reference count on a flow_entry_t.
44da14cebeSEric Cheng  */
45da14cebeSEric Cheng #define	FLOW_REFHOLD(flent) {					\
46da14cebeSEric Cheng 	DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent));	\
47da14cebeSEric Cheng 	mutex_enter(&(flent)->fe_lock);				\
48da14cebeSEric Cheng 	(flent)->fe_refcnt++;					\
49da14cebeSEric Cheng 	mutex_exit(&(flent)->fe_lock);				\
50da14cebeSEric Cheng }
51da14cebeSEric Cheng 
52da14cebeSEric Cheng /*
53da14cebeSEric Cheng  * Data paths must not attempt to use a flow entry if it is marked INCIPIENT
54da14cebeSEric Cheng  * or QUIESCE. In the former case the set up is not yet complete and the
55da14cebeSEric Cheng  * data path could stumble on inconsistent data structures. In the latter
56da14cebeSEric Cheng  * case a control operation is waiting for quiescence so that it can
57da14cebeSEric Cheng  * change callbacks or other structures without the use of locks.
58da14cebeSEric Cheng  */
59da14cebeSEric Cheng #define	FLOW_TRY_REFHOLD(flent, err) {				\
60da14cebeSEric Cheng 	DTRACE_PROBE1(flow_refhold, flow_entry_t *, (flent));	\
61da14cebeSEric Cheng 	(err) = 0;						\
62da14cebeSEric Cheng 	mutex_enter(&(flent)->fe_lock);				\
63da14cebeSEric Cheng 	if ((flent)->fe_flags & (FE_INCIPIENT | FE_QUIESCE | FE_CONDEMNED | \
64da14cebeSEric Cheng 	    FE_UF_NO_DATAPATH | FE_MC_NO_DATAPATH))			\
65da14cebeSEric Cheng 		(err) = -1;					\
66da14cebeSEric Cheng 	else							\
67da14cebeSEric Cheng 		(flent)->fe_refcnt++;				\
68da14cebeSEric Cheng 	mutex_exit(&(flent)->fe_lock);				\
69da14cebeSEric Cheng }
70da14cebeSEric Cheng 
71da14cebeSEric Cheng #define	FLOW_REFRELE(flent) {					\
72da14cebeSEric Cheng 	DTRACE_PROBE1(flow_refrele, flow_entry_t *, (flent));	\
73da14cebeSEric Cheng 	mutex_enter(&(flent)->fe_lock);				\
74da14cebeSEric Cheng 	ASSERT((flent)->fe_refcnt != 0);			\
75da14cebeSEric Cheng 	(flent)->fe_refcnt--;					\
76da14cebeSEric Cheng 	if ((flent)->fe_flags & FE_WAITER) {			\
77da14cebeSEric Cheng 		ASSERT((flent)->fe_refcnt != 0);		\
78da14cebeSEric Cheng 		cv_signal(&(flent)->fe_cv);			\
79da14cebeSEric Cheng 		mutex_exit(&(flent)->fe_lock);			\
80da14cebeSEric Cheng 	} else if ((flent)->fe_refcnt == 0) {			\
81da14cebeSEric Cheng 		mac_flow_destroy(flent);			\
82da14cebeSEric Cheng 	} else {						\
83da14cebeSEric Cheng 		mutex_exit(&(flent)->fe_lock);			\
84da14cebeSEric Cheng 	}							\
85da14cebeSEric Cheng }
86da14cebeSEric Cheng 
87da14cebeSEric Cheng #define	FLOW_USER_REFHOLD(flent) {			\
88da14cebeSEric Cheng 	mutex_enter(&(flent)->fe_lock);			\
89da14cebeSEric Cheng 	(flent)->fe_user_refcnt++;			\
90da14cebeSEric Cheng 	mutex_exit(&(flent)->fe_lock);			\
91da14cebeSEric Cheng }
92da14cebeSEric Cheng 
93da14cebeSEric Cheng #define	FLOW_USER_REFRELE(flent) {			\
94da14cebeSEric Cheng 	mutex_enter(&(flent)->fe_lock);			\
95da14cebeSEric Cheng 	ASSERT((flent)->fe_user_refcnt != 0);		\
96da14cebeSEric Cheng 	if (--(flent)->fe_user_refcnt == 0 &&		\
97da14cebeSEric Cheng 	    ((flent)->fe_flags & FE_WAITER))		\
98da14cebeSEric Cheng 		cv_signal(&(flent)->fe_cv);		\
99da14cebeSEric Cheng 	mutex_exit(&(flent)->fe_lock);			\
100da14cebeSEric Cheng }
101da14cebeSEric Cheng 
102da14cebeSEric Cheng #define	FLOW_FINAL_REFRELE(flent) {			\
103da14cebeSEric Cheng 	ASSERT(flent->fe_refcnt == 1 && flent->fe_user_refcnt == 0);	\
104da14cebeSEric Cheng 	FLOW_REFRELE(flent);				\
105da14cebeSEric Cheng }
106da14cebeSEric Cheng 
107da14cebeSEric Cheng /*
108da14cebeSEric Cheng  * Mark or unmark the flent with a bit flag
109da14cebeSEric Cheng  */
110da14cebeSEric Cheng #define	FLOW_MARK(flent, flag) {		\
111da14cebeSEric Cheng 	mutex_enter(&(flent)->fe_lock);		\
112da14cebeSEric Cheng 	(flent)->fe_flags |= flag;		\
113da14cebeSEric Cheng 	mutex_exit(&(flent)->fe_lock);		\
114da14cebeSEric Cheng }
115da14cebeSEric Cheng 
116da14cebeSEric Cheng #define	FLOW_UNMARK(flent, flag) {		\
117da14cebeSEric Cheng 	mutex_enter(&(flent)->fe_lock);		\
118da14cebeSEric Cheng 	(flent)->fe_flags &= ~flag;		\
119da14cebeSEric Cheng 	mutex_exit(&(flent)->fe_lock);		\
120da14cebeSEric Cheng }
121da14cebeSEric Cheng 
122da14cebeSEric Cheng #define	FLENT_TO_MIP(flent)			\
123da14cebeSEric Cheng 	(flent->fe_mbg != NULL ? mac_bcast_grp_mip(flent->fe_mbg) :	\
124da14cebeSEric Cheng 	((mac_client_impl_t *)flent->fe_mcip)->mci_mip)
125da14cebeSEric Cheng 
126da14cebeSEric Cheng /* Convert a bandwidth expressed in bps to a number of bytes per tick. */
127da14cebeSEric Cheng #define	FLOW_BYTES_PER_TICK(bps)	(((bps) >> 3) / hz)
128da14cebeSEric Cheng 
129da14cebeSEric Cheng /*
130da14cebeSEric Cheng  * Given an underlying range and a priority level, obtain the minimum for the
131da14cebeSEric Cheng  * new range.
132da14cebeSEric Cheng  */
133da14cebeSEric Cheng #define	FLOW_MIN_PRIORITY(min, max, pri)	\
134da14cebeSEric Cheng 	((min) + ((((max) - (min)) / MRP_PRIORITY_LEVELS) * (pri)))
135da14cebeSEric Cheng 
136da14cebeSEric Cheng /*
137da14cebeSEric Cheng  * Given an underlying range and a minimum level (base), obtain the maximum
138da14cebeSEric Cheng  * for the new range.
139da14cebeSEric Cheng  */
140da14cebeSEric Cheng #define	FLOW_MAX_PRIORITY(min, max, base)	\
141da14cebeSEric Cheng 	((base) + (((max) - (min)) / MRP_PRIORITY_LEVELS))
142da14cebeSEric Cheng 
143da14cebeSEric Cheng /*
144da14cebeSEric Cheng  * Given an underlying range and a priority level, get the absolute
145da14cebeSEric Cheng  * priority value. For now there are just 3 values, high, low and
146da14cebeSEric Cheng  * medium  so we can just return max, min or min + (max - min) / 2.
147da14cebeSEric Cheng  * If there are more than three we need to change this computation.
148da14cebeSEric Cheng  */
149da14cebeSEric Cheng #define	FLOW_PRIORITY(min, max, pri)		\
150da14cebeSEric Cheng 	(pri) == MPL_HIGH ? (max) :	\
151da14cebeSEric Cheng 	(pri) == MPL_LOW ? (min) :	\
152da14cebeSEric Cheng 	((min) + (((max) - (min)) / 2))
153da14cebeSEric Cheng 
154da14cebeSEric Cheng #define	MAC_FLOW_TAB_SIZE		500
155da14cebeSEric Cheng 
156da14cebeSEric Cheng typedef struct flow_entry_s		flow_entry_t;
157da14cebeSEric Cheng typedef struct flow_tab_s		flow_tab_t;
158da14cebeSEric Cheng typedef struct flow_state_s 		flow_state_t;
159da14cebeSEric Cheng struct mac_impl_s;
160da14cebeSEric Cheng struct mac_client_impl_s;
161da14cebeSEric Cheng 
162da14cebeSEric Cheng /*
163da14cebeSEric Cheng  * Classification flags used to lookup the flow.
164da14cebeSEric Cheng  */
165da14cebeSEric Cheng #define	FLOW_INBOUND		0x01
166da14cebeSEric Cheng #define	FLOW_OUTBOUND		0x02
167da14cebeSEric Cheng /* Don't compare VID when classifying the packets, see mac_rx_classify() */
168da14cebeSEric Cheng #define	FLOW_IGNORE_VLAN	0x04
169da14cebeSEric Cheng 
170da14cebeSEric Cheng /* Generic flow client function signature */
171da14cebeSEric Cheng typedef void		(*flow_fn_t)(void *, void *, mblk_t *, boolean_t);
172da14cebeSEric Cheng 
173da14cebeSEric Cheng /* Flow state */
174da14cebeSEric Cheng typedef enum {
175da14cebeSEric Cheng 	FLOW_DRIVER_UPCALL,
176da14cebeSEric Cheng 	FLOW_USER_REF
177da14cebeSEric Cheng } mac_flow_state_t;
178da14cebeSEric Cheng 
179da14cebeSEric Cheng /* Matches a flow_entry_t using the extracted flow_state_t info */
180da14cebeSEric Cheng typedef boolean_t	(*flow_match_fn_t)(flow_tab_t *, flow_entry_t *,
181da14cebeSEric Cheng 			    flow_state_t *);
182da14cebeSEric Cheng 
183da14cebeSEric Cheng /* fe_flags */
184da14cebeSEric Cheng #define	FE_QUIESCE		0x01	/* Quiesce the flow */
185da14cebeSEric Cheng #define	FE_WAITER		0x02	/* Flow has a waiter */
186da14cebeSEric Cheng #define	FE_FLOW_TAB		0x04	/* Flow is in the flow tab list */
187da14cebeSEric Cheng #define	FE_G_FLOW_HASH		0x08	/* Flow is in the global flow hash */
188da14cebeSEric Cheng #define	FE_INCIPIENT		0x10	/* Being setup */
189da14cebeSEric Cheng #define	FE_CONDEMNED		0x20	/* Being deleted */
190da14cebeSEric Cheng #define	FE_UF_NO_DATAPATH	0x40	/* No datapath setup for User flow */
191da14cebeSEric Cheng #define	FE_MC_NO_DATAPATH	0x80	/* No datapath setup for mac client */
192da14cebeSEric Cheng 
193da14cebeSEric Cheng /* fe_type */
194da14cebeSEric Cheng #define	FLOW_PRIMARY_MAC	0x01 	/* NIC primary MAC address */
195da14cebeSEric Cheng #define	FLOW_VNIC_MAC		0x02	/* VNIC flow */
196da14cebeSEric Cheng #define	FLOW_MCAST		0x04	/* Multicast (and broadcast) */
197da14cebeSEric Cheng #define	FLOW_OTHER		0x08	/* Other flows configured */
198da14cebeSEric Cheng #define	FLOW_USER		0x10	/* User defined flow */
199da14cebeSEric Cheng #define	FLOW_VNIC		FLOW_VNIC_MAC
200da14cebeSEric Cheng #define	FLOW_NO_STATS		0x20	/* Don't create stats for the flow */
201da14cebeSEric Cheng 
202da14cebeSEric Cheng /*
203da14cebeSEric Cheng  * Shared Bandwidth control counters between the soft ring set and its
204da14cebeSEric Cheng  * associated soft rings. In case the flow associated with NIC/VNIC
205da14cebeSEric Cheng  * has a group of Rx rings assigned to it, we have the same
206da14cebeSEric Cheng  * number of soft ring sets as we have the Rx ring in the group
207da14cebeSEric Cheng  * and each individual SRS (and its soft rings) decide when to
208da14cebeSEric Cheng  * poll their Rx ring independently. But if there is a B/W limit
209da14cebeSEric Cheng  * associated with the NIC/VNIC, then the B/W control counter is
210da14cebeSEric Cheng  * shared across all the SRS in the group and their associated
211da14cebeSEric Cheng  * soft rings.
212da14cebeSEric Cheng  *
213da14cebeSEric Cheng  * There is a many to 1 mapping between the SRS and
214da14cebeSEric Cheng  * mac_bw_ctl if the flow has a group of Rx rings associated with
215da14cebeSEric Cheng  * it.
216da14cebeSEric Cheng  */
217da14cebeSEric Cheng typedef struct mac_bw_ctl_s {
218da14cebeSEric Cheng 	kmutex_t	mac_bw_lock;
219da14cebeSEric Cheng 	uint32_t	mac_bw_state;
220da14cebeSEric Cheng 	size_t		mac_bw_sz;	/* ?? Is it needed */
221da14cebeSEric Cheng 	size_t		mac_bw_limit;	/* Max bytes to process per tick */
222da14cebeSEric Cheng 	size_t		mac_bw_used;	/* Bytes processed in current tick */
223da14cebeSEric Cheng 	size_t		mac_bw_drop_threshold; /* Max queue length */
224da14cebeSEric Cheng 	size_t		mac_bw_drop_bytes;
225da14cebeSEric Cheng 	size_t		mac_bw_polled;
226da14cebeSEric Cheng 	size_t		mac_bw_intr;
227da14cebeSEric Cheng 	clock_t		mac_bw_curr_time;
228da14cebeSEric Cheng } mac_bw_ctl_t;
229da14cebeSEric Cheng 
230da14cebeSEric Cheng struct flow_entry_s {					/* Protected by */
231da14cebeSEric Cheng 	struct flow_entry_s	*fe_next;		/* ft_lock */
232da14cebeSEric Cheng 
233da14cebeSEric Cheng 	datalink_id_t		fe_link_id;		/* WO */
234da14cebeSEric Cheng 
235da14cebeSEric Cheng 	/* Properties as specified for this flow */
236da14cebeSEric Cheng 	mac_resource_props_t	fe_resource_props;	/* SL */
237da14cebeSEric Cheng 
238da14cebeSEric Cheng 	/* Properties actually effective at run time for this flow */
239da14cebeSEric Cheng 	mac_resource_props_t	fe_effective_props;	/* SL */
240da14cebeSEric Cheng 
241da14cebeSEric Cheng 	kmutex_t		fe_lock;
242da000602SGirish Moodalbail 	char			fe_flow_name[MAXFLOWNAMELEN];	/* fe_lock */
243da14cebeSEric Cheng 	flow_desc_t		fe_flow_desc;		/* fe_lock */
244da14cebeSEric Cheng 	kcondvar_t		fe_cv;			/* fe_lock */
245da14cebeSEric Cheng 	/*
246da14cebeSEric Cheng 	 * Initial flow ref is 1 on creation. A thread that lookups the
247da14cebeSEric Cheng 	 * flent typically by a mac_flow_lookup() dynamically holds a ref.
248da14cebeSEric Cheng 	 * If the ref is 1, it means there arent' any upcalls from the driver
249da14cebeSEric Cheng 	 * or downcalls from the stack using this flent. Structures pointing
250da14cebeSEric Cheng 	 * to the flent or flent inserted in lists don't count towards this
251da14cebeSEric Cheng 	 * refcnt. Instead they are tracked using fe_flags. Only a control
252da14cebeSEric Cheng 	 * thread doing a teardown operation deletes the flent, after waiting
253da14cebeSEric Cheng 	 * for upcalls to finish synchronously. The fe_refcnt tracks
254da14cebeSEric Cheng 	 * the number of upcall refs
255da14cebeSEric Cheng 	 */
256da14cebeSEric Cheng 	uint32_t		fe_refcnt;		/* fe_lock */
257da14cebeSEric Cheng 
258da14cebeSEric Cheng 	/*
259da14cebeSEric Cheng 	 * This tracks lookups done using the global hash list for user
260da14cebeSEric Cheng 	 * generated flows. This refcnt only protects the flent itself
261da14cebeSEric Cheng 	 * from disappearing and helps walkers to read the flent info such
262da14cebeSEric Cheng 	 * as flow spec. However the flent may be quiesced and the SRS could
263da14cebeSEric Cheng 	 * be deleted. The fe_user_refcnt tracks the number of global flow
264da14cebeSEric Cheng 	 * has refs.
265da14cebeSEric Cheng 	 */
266da14cebeSEric Cheng 	uint32_t		fe_user_refcnt;		/* fe_lock */
267da14cebeSEric Cheng 	uint_t			fe_flags;		/* fe_lock */
268da14cebeSEric Cheng 
269da14cebeSEric Cheng 	/*
270da14cebeSEric Cheng 	 * Function/args to invoke for delivering matching packets
271da14cebeSEric Cheng 	 * Only the function ff_fn may be changed dynamically and atomically.
272da14cebeSEric Cheng 	 * The ff_arg1 and ff_arg2 are set at creation time and may not
273da14cebeSEric Cheng 	 * be changed.
274da14cebeSEric Cheng 	 */
275da14cebeSEric Cheng 	flow_fn_t		fe_cb_fn;		/* fe_lock */
276da14cebeSEric Cheng 	void 			*fe_cb_arg1;		/* fe_lock */
277da14cebeSEric Cheng 	void			*fe_cb_arg2;		/* fe_lock */
278da14cebeSEric Cheng 
279da14cebeSEric Cheng 	void			*fe_client_cookie;	/* WO */
280da14cebeSEric Cheng 	void			*fe_rx_ring_group;	/* SL */
281da14cebeSEric Cheng 	void			*fe_rx_srs[MAX_RINGS_PER_GROUP]; /* fe_lock */
282da14cebeSEric Cheng 	int			fe_rx_srs_cnt;		/* fe_lock */
283*0dc2366fSVenugopal Iyer 	void			*fe_tx_ring_group;
284da14cebeSEric Cheng 	void			*fe_tx_srs;		/* WO */
285*0dc2366fSVenugopal Iyer 	int			fe_tx_ring_cnt;
286da14cebeSEric Cheng 
287da14cebeSEric Cheng 	/*
288da14cebeSEric Cheng 	 * This is a unicast flow, and is a mac_client_impl_t
289da14cebeSEric Cheng 	 */
290da14cebeSEric Cheng 	void			*fe_mcip; 		/* WO */
291da14cebeSEric Cheng 
292da14cebeSEric Cheng 	/*
293da14cebeSEric Cheng 	 * Used by mci_flent_list of mac_client_impl_t to track flows sharing
294da14cebeSEric Cheng 	 * the same mac_client_impl_t.
295da14cebeSEric Cheng 	 */
296da14cebeSEric Cheng 	struct flow_entry_s	*fe_client_next;
297da14cebeSEric Cheng 
298da14cebeSEric Cheng 	/*
299da14cebeSEric Cheng 	 * This is a broadcast or multicast flow and is a mac_bcast_grp_t
300da14cebeSEric Cheng 	 */
301da14cebeSEric Cheng 	void			*fe_mbg;		/* WO */
302da14cebeSEric Cheng 	uint_t			fe_type;		/* WO */
303da14cebeSEric Cheng 
304da14cebeSEric Cheng 	/*
305da14cebeSEric Cheng 	 * BW control info.
306da14cebeSEric Cheng 	 */
307da14cebeSEric Cheng 	mac_bw_ctl_t		fe_tx_bw;
308da14cebeSEric Cheng 	mac_bw_ctl_t		fe_rx_bw;
309da14cebeSEric Cheng 
310da14cebeSEric Cheng 	/*
311da14cebeSEric Cheng 	 * Used by flow table lookup code
312da14cebeSEric Cheng 	 */
313da14cebeSEric Cheng 	flow_match_fn_t		fe_match;
314da14cebeSEric Cheng 
315da14cebeSEric Cheng 	/*
316da14cebeSEric Cheng 	 * Used by mac_flow_remove().
317da14cebeSEric Cheng 	 */
318da14cebeSEric Cheng 	int			fe_index;
319da14cebeSEric Cheng 	flow_tab_t		*fe_flow_tab;
320da14cebeSEric Cheng 
321da14cebeSEric Cheng 	kstat_t			*fe_ksp;
322*0dc2366fSVenugopal Iyer 	kstat_t			*fe_misc_stat_ksp;
323*0dc2366fSVenugopal Iyer 
324da14cebeSEric Cheng 	boolean_t		fe_desc_logged;
325da14cebeSEric Cheng 	uint64_t		fe_nic_speed;
326da14cebeSEric Cheng };
327da14cebeSEric Cheng 
328da14cebeSEric Cheng /*
329da14cebeSEric Cheng  * Various structures used by the flows framework for keeping track
330da14cebeSEric Cheng  * of packet state information.
331da14cebeSEric Cheng  */
332da14cebeSEric Cheng 
333da14cebeSEric Cheng /* Layer 2 */
334da14cebeSEric Cheng typedef struct flow_l2info_s {
335da14cebeSEric Cheng 	uchar_t		*l2_start;
336da14cebeSEric Cheng 	uint8_t		*l2_daddr;
337da14cebeSEric Cheng 	uint16_t	l2_vid;
338da14cebeSEric Cheng 	uint32_t	l2_sap;
339da14cebeSEric Cheng 	uint_t		l2_hdrsize;
340da14cebeSEric Cheng } flow_l2info_t;
341da14cebeSEric Cheng 
342da14cebeSEric Cheng /* Layer 3 */
343da14cebeSEric Cheng typedef struct flow_l3info_s {
344da14cebeSEric Cheng 	uchar_t		*l3_start;
345da14cebeSEric Cheng 	uint8_t		l3_protocol;
346da14cebeSEric Cheng 	uint8_t		l3_version;
347da14cebeSEric Cheng 	boolean_t	l3_dst_or_src;
348da14cebeSEric Cheng 	uint_t		l3_hdrsize;
349da14cebeSEric Cheng 	boolean_t	l3_fragmented;
350da14cebeSEric Cheng } flow_l3info_t;
351da14cebeSEric Cheng 
352da14cebeSEric Cheng /* Layer 4 */
353da14cebeSEric Cheng typedef struct flow_l4info_s {
354da14cebeSEric Cheng 	uchar_t		*l4_start;
355da14cebeSEric Cheng 	uint16_t	l4_src_port;
356da14cebeSEric Cheng 	uint16_t	l4_dst_port;
357da14cebeSEric Cheng 	uint16_t	l4_hash_port;
358da14cebeSEric Cheng } flow_l4info_t;
359da14cebeSEric Cheng 
360da14cebeSEric Cheng /*
361da14cebeSEric Cheng  * Combined state structure.
362da14cebeSEric Cheng  * Holds flow direction and an mblk_t pointer.
363da14cebeSEric Cheng  */
364da14cebeSEric Cheng struct flow_state_s {
365da14cebeSEric Cheng 	uint_t		fs_flags;
366da14cebeSEric Cheng 	mblk_t		*fs_mp;
367da14cebeSEric Cheng 	flow_l2info_t	fs_l2info;
368da14cebeSEric Cheng 	flow_l3info_t	fs_l3info;
369da14cebeSEric Cheng 	flow_l4info_t	fs_l4info;
370da14cebeSEric Cheng };
371da14cebeSEric Cheng 
372da14cebeSEric Cheng /*
373da14cebeSEric Cheng  * Flow ops vector.
374da14cebeSEric Cheng  * There are two groups of functions. The ones ending with _fe are
375da14cebeSEric Cheng  * called when a flow is being added. The others (hash, accept) are
376da14cebeSEric Cheng  * called at flow lookup time.
377da14cebeSEric Cheng  */
378da14cebeSEric Cheng #define	FLOW_MAX_ACCEPT	16
379da14cebeSEric Cheng typedef struct flow_ops_s {
380da14cebeSEric Cheng 	/*
381da14cebeSEric Cheng 	 * fo_accept_fe():
382da14cebeSEric Cheng 	 * Validates the contents of the flow and checks whether
383da14cebeSEric Cheng 	 * it's compatible with the flow table. sets the fe_match
384da14cebeSEric Cheng 	 * function of the flow.
385da14cebeSEric Cheng 	 */
386da14cebeSEric Cheng 	int		(*fo_accept_fe)(flow_tab_t *, flow_entry_t *);
387da14cebeSEric Cheng 	/*
388da14cebeSEric Cheng 	 * fo_hash_fe():
389da14cebeSEric Cheng 	 * Generates a hash index to the flow table. This function
390da14cebeSEric Cheng 	 * must use the same algorithm as fo_hash(), which is used
391da14cebeSEric Cheng 	 * by the flow lookup code path.
392da14cebeSEric Cheng 	 */
393da14cebeSEric Cheng 	uint32_t	(*fo_hash_fe)(flow_tab_t *, flow_entry_t *);
394da14cebeSEric Cheng 	/*
395da14cebeSEric Cheng 	 * fo_match_fe():
396da14cebeSEric Cheng 	 * This is used for finding identical flows.
397da14cebeSEric Cheng 	 */
398da14cebeSEric Cheng 	boolean_t	(*fo_match_fe)(flow_tab_t *, flow_entry_t *,
399da14cebeSEric Cheng 			    flow_entry_t *);
400da14cebeSEric Cheng 	/*
401da14cebeSEric Cheng 	 * fo_insert_fe():
402da14cebeSEric Cheng 	 * Used for inserting a flow to a flow chain.
403da14cebeSEric Cheng 	 * Protocols that have special ordering requirements would
404da14cebeSEric Cheng 	 * need to implement this. For those that don't,
405da14cebeSEric Cheng 	 * flow_generic_insert_fe() may be used.
406da14cebeSEric Cheng 	 */
407da14cebeSEric Cheng 	int		(*fo_insert_fe)(flow_tab_t *, flow_entry_t **,
408da14cebeSEric Cheng 			    flow_entry_t *);
409da14cebeSEric Cheng 
410da14cebeSEric Cheng 	/*
411da14cebeSEric Cheng 	 * Calculates the flow hash index based on the accumulated
412da14cebeSEric Cheng 	 * state in flow_state_t. Must use the same algorithm as
413da14cebeSEric Cheng 	 * fo_hash_fe().
414da14cebeSEric Cheng 	 */
415da14cebeSEric Cheng 	uint32_t	(*fo_hash)(flow_tab_t *, flow_state_t *);
416da14cebeSEric Cheng 
417da14cebeSEric Cheng 	/*
418da14cebeSEric Cheng 	 * Array of accept fuctions.
419da14cebeSEric Cheng 	 * Each function in the array will accumulate enough state
420da14cebeSEric Cheng 	 * (header length, protocol) to allow the next function to
421da14cebeSEric Cheng 	 * proceed. We support up to FLOW_MAX_ACCEPT functions which
422da14cebeSEric Cheng 	 * should be sufficient for all practical purposes.
423da14cebeSEric Cheng 	 */
424da14cebeSEric Cheng 	int		(*fo_accept[FLOW_MAX_ACCEPT])(flow_tab_t *,
425da14cebeSEric Cheng 			    flow_state_t *);
426da14cebeSEric Cheng } flow_ops_t;
427da14cebeSEric Cheng 
428da14cebeSEric Cheng /*
429da14cebeSEric Cheng  * Generic flow table.
430da14cebeSEric Cheng  */
431da14cebeSEric Cheng struct flow_tab_s {
432da14cebeSEric Cheng 	krwlock_t		ft_lock;
433da14cebeSEric Cheng 	/*
434da14cebeSEric Cheng 	 * Contains a list of functions (described above)
435da14cebeSEric Cheng 	 * specific to this table type.
436da14cebeSEric Cheng 	 */
437da14cebeSEric Cheng 	flow_ops_t		ft_ops;
438da14cebeSEric Cheng 
439da14cebeSEric Cheng 	/*
440da14cebeSEric Cheng 	 * Indicates what types of flows are supported.
441da14cebeSEric Cheng 	 */
442da14cebeSEric Cheng 	flow_mask_t		ft_mask;
443da14cebeSEric Cheng 
444da14cebeSEric Cheng 	/*
445da14cebeSEric Cheng 	 * An array of flow_entry_t * of size ft_size.
446da14cebeSEric Cheng 	 * Each element is the beginning of a hash chain.
447da14cebeSEric Cheng 	 */
448da14cebeSEric Cheng 	flow_entry_t		**ft_table;
449da14cebeSEric Cheng 	uint_t			ft_size;
450da14cebeSEric Cheng 
451da14cebeSEric Cheng 	/*
452da14cebeSEric Cheng 	 * The number of flows inserted into ft_table.
453da14cebeSEric Cheng 	 */
454da14cebeSEric Cheng 	uint_t			ft_flow_count;
455da14cebeSEric Cheng 	struct mac_impl_s	*ft_mip;
456da14cebeSEric Cheng 	struct mac_client_impl_s	*ft_mcip;
457da14cebeSEric Cheng };
458da14cebeSEric Cheng 
459da14cebeSEric Cheng /*
460da14cebeSEric Cheng  * This is used for describing what type of flow table can be created.
461da14cebeSEric Cheng  * mac_flow.c contains a list of these structures.
462da14cebeSEric Cheng  */
463da14cebeSEric Cheng typedef struct flow_tab_info_s {
464da14cebeSEric Cheng 	flow_ops_t		*fti_ops;
465da14cebeSEric Cheng 	flow_mask_t		fti_mask;
466da14cebeSEric Cheng 	uint_t			fti_size;
467da14cebeSEric Cheng } flow_tab_info_t;
468da14cebeSEric Cheng 
469da14cebeSEric Cheng #define	FLOW_TAB_EMPTY(ft)	((ft) == NULL || (ft)->ft_flow_count == 0)
470da14cebeSEric Cheng 
471da14cebeSEric Cheng 
472*0dc2366fSVenugopal Iyer #define	MCIP_STAT_UPDATE(m, s, c) {					\
473*0dc2366fSVenugopal Iyer 	((mac_client_impl_t *)(m))->mci_misc_stat.mms_##s		\
474*0dc2366fSVenugopal Iyer 	+= ((uint64_t)(c));						\
475da14cebeSEric Cheng }
476da14cebeSEric Cheng 
477*0dc2366fSVenugopal Iyer #define	SRS_RX_STAT_UPDATE(m, s, c)  {					\
478*0dc2366fSVenugopal Iyer 	((mac_soft_ring_set_t *)(m))->srs_rx.sr_stat.mrs_##s		\
479*0dc2366fSVenugopal Iyer 	+= ((uint64_t)(c));						\
480*0dc2366fSVenugopal Iyer }
481*0dc2366fSVenugopal Iyer 
482*0dc2366fSVenugopal Iyer #define	SRS_TX_STAT_UPDATE(m, s, c)  {					\
483*0dc2366fSVenugopal Iyer 	((mac_soft_ring_set_t *)(m))->srs_tx.st_stat.mts_##s		\
484*0dc2366fSVenugopal Iyer 	+= ((uint64_t)(c));						\
485*0dc2366fSVenugopal Iyer }
486*0dc2366fSVenugopal Iyer 
487*0dc2366fSVenugopal Iyer #define	SRS_TX_STATS_UPDATE(m, s) {					\
488*0dc2366fSVenugopal Iyer 	SRS_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets);		\
489*0dc2366fSVenugopal Iyer 	SRS_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes);		\
490*0dc2366fSVenugopal Iyer 	SRS_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors);		\
491*0dc2366fSVenugopal Iyer }
492*0dc2366fSVenugopal Iyer 
493*0dc2366fSVenugopal Iyer #define	SOFTRING_TX_STAT_UPDATE(m, s, c)  {				\
494*0dc2366fSVenugopal Iyer 	((mac_soft_ring_t *)(m))->s_st_stat.mts_##s += ((uint64_t)(c));	\
495*0dc2366fSVenugopal Iyer }
496*0dc2366fSVenugopal Iyer 
497*0dc2366fSVenugopal Iyer #define	SOFTRING_TX_STATS_UPDATE(m, s) {				\
498*0dc2366fSVenugopal Iyer 	SOFTRING_TX_STAT_UPDATE((m), opackets, (s)->mts_opackets);	\
499*0dc2366fSVenugopal Iyer 	SOFTRING_TX_STAT_UPDATE((m), obytes, (s)->mts_obytes);		\
500*0dc2366fSVenugopal Iyer 	SOFTRING_TX_STAT_UPDATE((m), oerrors, (s)->mts_oerrors);	\
501da14cebeSEric Cheng }
502da14cebeSEric Cheng 
503da14cebeSEric Cheng extern void	mac_flow_init();
504da14cebeSEric Cheng extern void	mac_flow_fini();
505da14cebeSEric Cheng extern int	mac_flow_create(flow_desc_t *, mac_resource_props_t *,
506da14cebeSEric Cheng 		    char *, void *, uint_t, flow_entry_t **);
507da14cebeSEric Cheng 
508da14cebeSEric Cheng extern int	mac_flow_add(flow_tab_t *, flow_entry_t *);
509da14cebeSEric Cheng extern int	mac_flow_add_subflow(mac_client_handle_t, flow_entry_t *,
510da14cebeSEric Cheng 		    boolean_t);
511da14cebeSEric Cheng extern int	mac_flow_hash_add(flow_entry_t *);
512da14cebeSEric Cheng extern int	mac_flow_lookup_byname(char *, flow_entry_t **);
513da14cebeSEric Cheng extern int	mac_flow_lookup(flow_tab_t *, mblk_t *, uint_t,
514da14cebeSEric Cheng 		    flow_entry_t **);
515da14cebeSEric Cheng 
516da14cebeSEric Cheng extern int	mac_flow_walk(flow_tab_t *, int (*)(flow_entry_t *, void *),
517da14cebeSEric Cheng 		    void *);
518da14cebeSEric Cheng 
519da14cebeSEric Cheng extern int	mac_flow_walk_nolock(flow_tab_t *,
520da14cebeSEric Cheng 		    int (*)(flow_entry_t *, void *), void *);
521da14cebeSEric Cheng 
522da14cebeSEric Cheng extern void	mac_flow_modify(flow_tab_t *, flow_entry_t *,
523da14cebeSEric Cheng 		    mac_resource_props_t *);
524da14cebeSEric Cheng 
525da14cebeSEric Cheng extern void	*mac_flow_get_client_cookie(flow_entry_t *);
526da14cebeSEric Cheng 
527da14cebeSEric Cheng extern uint32_t	mac_flow_modify_props(flow_entry_t *, mac_resource_props_t *);
528da14cebeSEric Cheng 
529da14cebeSEric Cheng extern int	mac_flow_update(flow_tab_t *, flow_entry_t *, flow_desc_t *);
530da14cebeSEric Cheng extern void	mac_flow_get_desc(flow_entry_t *, flow_desc_t *);
531da14cebeSEric Cheng extern void	mac_flow_set_desc(flow_entry_t *, flow_desc_t *);
532da14cebeSEric Cheng 
533da14cebeSEric Cheng extern void	mac_flow_remove(flow_tab_t *, flow_entry_t *, boolean_t);
534da14cebeSEric Cheng extern void	mac_flow_hash_remove(flow_entry_t *);
535da14cebeSEric Cheng extern void	mac_flow_wait(flow_entry_t *, mac_flow_state_t);
536da14cebeSEric Cheng extern void	mac_flow_quiesce(flow_entry_t *);
537da14cebeSEric Cheng extern void	mac_flow_restart(flow_entry_t *);
538da14cebeSEric Cheng extern void	mac_flow_cleanup(flow_entry_t *);
539da14cebeSEric Cheng extern void	mac_flow_destroy(flow_entry_t *);
540da14cebeSEric Cheng 
541da14cebeSEric Cheng extern void	mac_flow_tab_create(flow_ops_t *, flow_mask_t, uint_t,
542da14cebeSEric Cheng 		    struct mac_impl_s *, flow_tab_t **);
543da14cebeSEric Cheng extern void	mac_flow_l2tab_create(struct mac_impl_s *, flow_tab_t **);
544da14cebeSEric Cheng extern void	mac_flow_tab_destroy(flow_tab_t *);
545da14cebeSEric Cheng extern void	mac_flow_drop(void *, void *, mblk_t *);
546da14cebeSEric Cheng extern void	flow_stat_destroy(flow_entry_t *);
547da14cebeSEric Cheng 
548da14cebeSEric Cheng #ifdef	__cplusplus
549da14cebeSEric Cheng }
550da14cebeSEric Cheng #endif
551da14cebeSEric Cheng 
552da14cebeSEric Cheng #endif	/* _MAC_FLOW_IMPL_H */
553