xref: /illumos-gate/usr/src/uts/sun4v/sys/vnet_gen.h (revision e6f8def1ace27f327240a0b4b090911007f71137)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef _VNET_GEN_H
28 #define	_VNET_GEN_H
29 
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33 
34 #include <sys/vgen_stats.h>
35 
36 #define	VGEN_SUCCESS		(0)	/* successful return */
37 #define	VGEN_FAILURE		(-1)	/* unsuccessful return */
38 
39 #define	VGEN_NUM_VER		1	/* max # of vgen versions */
40 
41 #define	VGEN_LOCAL	1	/* local ldc end-point */
42 #define	VGEN_PEER	2	/* peer ldc end-point */
43 
44 /* vgen_t flags */
45 #define	VGEN_STOPPED		0x0
46 #define	VGEN_STARTED		0x1
47 
48 #define	KMEM_FREE(_p)		kmem_free((_p), sizeof (*(_p)))
49 
50 #define	VGEN_INIT_MCTAB_SIZE	16	/* initial size of multicast table */
51 
52 #define	READ_ENTER(x)	rw_enter(x, RW_READER)
53 #define	WRITE_ENTER(x)	rw_enter(x, RW_WRITER)
54 #define	RW_EXIT(x)	rw_exit(x)
55 
56 /* channel flags */
57 #define	CHANNEL_ATTACHED	0x1
58 #define	CHANNEL_STARTED		0x2
59 
60 /* transmit return values */
61 #define	VGEN_TX_SUCCESS		0	/* transmit success */
62 #define	VGEN_TX_FAILURE		1	/* transmit failure */
63 #define	VGEN_TX_NORESOURCES	2	/* out of tbufs/txds */
64 
65 /* private descriptor flags */
66 #define	VGEN_PRIV_DESC_FREE	0x0	/* desc is available */
67 #define	VGEN_PRIV_DESC_BUSY	0x1	/* desc in use */
68 
69 #define	LDC_TO_VNET(ldcp)  ((ldcp)->portp->vgenp->vnetp)
70 #define	LDC_TO_VGEN(ldcp)  ((ldcp)->portp->vgenp)
71 
72 /* receive thread flags */
73 #define	VGEN_WTHR_DATARCVD 		0x01 /* data received */
74 #define	VGEN_WTHR_STOP 			0x02 /* stop worker thr request */
75 #define	VGEN_WTHR_PROCESSING		0x04 /* worker thr awake & processing */
76 
77 #define	VGEN_LDC_MTU		64	/* ldc pkt transfer mtu */
78 #define	VGEN_LDC_UP_DELAY	100	/* usec delay between ldc_up retries */
79 #define	VGEN_LDC_CLOSE_DELAY	100	/* usec delay between ldc_cl retries */
80 #define	VGEN_LDC_UNINIT_DELAY	100	/* usec delay between uninit retries */
81 #define	VGEN_TXWD_INTERVAL	1000	/* tx watchdog freq in msec */
82 #define	VGEN_TXWD_TIMEOUT	1000	/* tx watchdog timeout in msec */
83 
84 #define	VGEN_NUM_VMPOOLS	3	/* number of vio mblk pools */
85 
86 #define	VGEN_DBLK_SZ_128	128	/* data buffer size 128 bytes */
87 #define	VGEN_DBLK_SZ_256	256	/* data buffer size 256 bytes */
88 #define	VGEN_DBLK_SZ_2048	2048	/* data buffer size 2K bytes */
89 #define	VGEN_NRBUFS		512	/* number of receive bufs */
90 
91 #define	VGEN_TXDBLK_SZ		2048	/* Tx data buffer size */
92 
93 #define	VGEN_NUM_DESCRIPTORS_MIN	128	/* min # of descriptors */
94 
95 static struct ether_addr etherbroadcastaddr = {
96 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
97 };
98 /*
99  * MIB II broadcast/multicast packets
100  */
101 #define	IS_BROADCAST(ehp) \
102 		(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
103 #define	IS_MULTICAST(ehp) \
104 		((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
105 
106 /*
107  * The handshake process consists of 5 phases defined below, with VH_PHASE0
108  * being the pre-handshake phase and VH_DONE is the phase to indicate
109  * successful completion of all phases. Each phase may have one to several
110  * handshake states which are required to complete successfully to move to the
111  * next phase. See functions vgen_handshake() and vgen_handshake_done() for
112  * more details.
113  */
114 /* Handshake phases */
115 enum {	VH_PHASE0, VH_PHASE1, VH_PHASE2, VH_PHASE3, VH_PHASE4, VH_DONE = 0x80 };
116 
117 /* Handshake states */
118 enum {
119 
120 	VER_INFO_SENT	=	0x1,
121 	VER_ACK_RCVD	=	0x2,
122 	VER_INFO_RCVD	=	0x4,
123 	VER_ACK_SENT	=	0x8,
124 	VER_NEGOTIATED	=	(VER_ACK_RCVD | VER_ACK_SENT),
125 
126 	ATTR_INFO_SENT	=	0x10,
127 	ATTR_ACK_RCVD	=	0x20,
128 	ATTR_INFO_RCVD	=	0x40,
129 	ATTR_ACK_SENT	=	0x80,
130 	ATTR_INFO_EXCHANGED	=	(ATTR_ACK_RCVD | ATTR_ACK_SENT),
131 
132 	DRING_INFO_SENT	=	0x100,
133 	DRING_ACK_RCVD	=	0x200,
134 	DRING_INFO_RCVD	=	0x400,
135 	DRING_ACK_SENT	=	0x800,
136 	DRING_INFO_EXCHANGED	=	(DRING_ACK_RCVD | DRING_ACK_SENT),
137 
138 	RDX_INFO_SENT	=	0x1000,
139 	RDX_ACK_RCVD	=	0x2000,
140 	RDX_INFO_RCVD	=	0x4000,
141 	RDX_ACK_SENT	=	0x8000,
142 	RDX_EXCHANGED	=	(RDX_ACK_RCVD | RDX_ACK_SENT)
143 
144 };
145 
146 /* reset flags */
147 typedef enum {
148 	VGEN_FLAG_EVT_RESET = 0x1,	/* channel reset event */
149 	VGEN_FLAG_NEED_LDCRESET = 0x2,	/* need channel reset */
150 	VGEN_FLAG_UNINIT = 0x4		/* channel tear down */
151 } vgen_reset_flags_t;
152 
153 /* caller information needed in some code paths */
154 typedef enum {
155 	VGEN_LDC_CB = 0x1,	/* ldc callback handler */
156 	VGEN_MSG_THR = 0x2,	/* vio message worker thread */
157 	VGEN_OTHER = 0x4	/* other threads - tx etc */
158 } vgen_caller_t;
159 
160 /* get the address of next tbuf */
161 #define	NEXTTBUF(ldcp, tbufp)	(((tbufp) + 1) == (ldcp)->tbufendp    \
162 		? (ldcp)->tbufp : ((tbufp) + 1))
163 
164 /* increment recv index */
165 #define	INCR_RXI(i, ldcp)	\
166 		((i) = (((i) + 1) & ((ldcp)->num_rxds - 1)))
167 
168 /* decrement recv index */
169 #define	DECR_RXI(i, ldcp)	\
170 		((i) = (((i) - 1) & ((ldcp)->num_rxds - 1)))
171 
172 /* increment tx index */
173 #define	INCR_TXI(i, ldcp)	\
174 		((i) = (((i) + 1) & ((ldcp)->num_txds - 1)))
175 
176 /* decrement tx index */
177 #define	DECR_TXI(i, ldcp)	\
178 		((i) = (((i) - 1) & ((ldcp)->num_txds - 1)))
179 
180 /* bounds check rx index */
181 #define	CHECK_RXI(i, ldcp)	\
182 		(((i) >= 0) && ((i) < (ldcp)->num_rxds))
183 
184 /* bounds check tx index */
185 #define	CHECK_TXI(i, ldcp)	\
186 		(((i) >= 0) && ((i) < (ldcp)->num_txds))
187 
188 #ifdef DEBUG
189 
190 /* Error injection codes */
191 #define	VGEN_ERR_HVER		0x1	/* handshake version */
192 #define	VGEN_ERR_HTIMEOUT	0x2	/* handshake timeout */
193 #define	VGEN_ERR_HSID		0x4	/* handshake session id */
194 #define	VGEN_ERR_HSTATE		0x8	/* handshake state */
195 #define	VGEN_ERR_TXTIMEOUT	0x10	/* tx timeout */
196 #define	VGEN_ERR_RXLOST		0x20	/* rx lost pkts */
197 
198 #endif
199 /* private descriptor */
200 typedef struct vgen_priv_desc {
201 	uint64_t		flags;		/* flag bits */
202 	vnet_public_desc_t	*descp;		/* associated public desc */
203 	ldc_mem_handle_t	memhandle;	/* mem handle for data */
204 	caddr_t			datap;		/* prealloc'd tx data buffer */
205 	uint64_t		datalen;	/* total actual datalen */
206 	uint64_t		ncookies;	/* num ldc_mem_cookies */
207 	ldc_mem_cookie_t	memcookie[MAX_COOKIES];	/* data cookies */
208 } vgen_private_desc_t;
209 
210 /*
211  * Handshake parameters (per vio_mailbox.h) of each ldc end point, used
212  * during handshake negotiation.
213  */
214 typedef struct vgen_handshake_params {
215 	/* version specific params */
216 	uint16_t	ver_major;		/* major version number */
217 	uint16_t	ver_minor;		/* minor version number */
218 	uint8_t		dev_class;		/* device class */
219 
220 	/* attributes specific params */
221 	uint64_t		mtu;		/* max transfer unit size */
222 	uint64_t		addr;		/* address of the device */
223 	uint8_t			addr_type;	/* type of address */
224 	uint8_t			xfer_mode;	/* SHM or PKT */
225 	uint16_t		ack_freq;	/* dring data ack freq */
226 	uint32_t		physlink_update; /* physlink updates */
227 	uint8_t			dring_mode;	/* Descriptor ring mode */
228 
229 	/* descriptor ring params */
230 	uint32_t		num_desc;	/* # of descriptors in ring */
231 	uint32_t		desc_size;	/* size of descriptor */
232 	ldc_mem_cookie_t	dring_cookie;	/* desc ring cookie */
233 	uint32_t		dring_ncookies;	/* # of dring cookies */
234 	uint64_t		dring_ident;	/* ident=0 for INFO msg */
235 	boolean_t		dring_ready;	/* dring ready flag */
236 } vgen_hparams_t;
237 
238 /* version info */
239 typedef struct vgen_ver {
240 	uint16_t	ver_major;		/* major version number */
241 	uint16_t	ver_minor;		/* minor version number */
242 } vgen_ver_t;
243 
244 /*
245  * vnet-protocol-version dependent function prototypes.
246  */
247 typedef int	(*vgen_ldctx_t) (void *, mblk_t *);
248 typedef void	(*vgen_ldcrx_pktdata_t) (void *, void *, uint32_t);
249 typedef int	(*vgen_ldcrx_dringdata_t) (void *, void *);
250 
251 /*
252  * LDC end point abstraction in vnet. This structure holds all the information
253  * that is required to configure and use the Channel for data transfers with
254  * the peer LDC end point (vnet or vswitch), using VIO Protocol.
255  */
256 typedef struct vgen_ldc {
257 
258 	struct vgen_port	*portp;		/* associated port */
259 
260 	/*
261 	 * Locks:
262 	 * locking hierarchy when more than one lock is held concurrently:
263 	 * cblock > rxlock > txlock > tclock.
264 	 */
265 	kmutex_t		cblock;		/* sync callback processing */
266 	kmutex_t		txlock;		/* protect txd alloc */
267 	kmutex_t		tclock;		/* tx reclaim lock */
268 	kmutex_t		wrlock;		/* sync transmits */
269 	kmutex_t		rxlock;		/* sync reception */
270 	kmutex_t		pollq_lock;	/* sync polling and rxworker */
271 
272 	/*
273 	 * Channel and Handshake Info
274 	 */
275 	uint64_t		ldc_id;		/* channel number */
276 	uint64_t		ldc_handle;	/* channel handle */
277 	ldc_status_t		ldc_status;	/* channel status */
278 	vgen_ver_t		vgen_versions[VGEN_NUM_VER]; /* versions */
279 	int			hphase;		/* handshake phase */
280 	int			hstate;		/* handshake state bits */
281 	link_state_t		link_state;	/* channel link state */
282 #ifdef	VNET_IOC_DEBUG
283 	boolean_t		link_down_forced; /* forced link down */
284 #endif
285 	uint32_t		local_sid;	/* local session id */
286 	uint32_t		peer_sid;	/* session id of peer */
287 	vgen_hparams_t		local_hparams;	/* local handshake params */
288 	vgen_hparams_t		peer_hparams;	/* peer's handshake params */
289 	timeout_id_t		htid;		/* handshake wd timeout id */
290 	timeout_id_t		cancel_htid;	/* cancel handshake watchdog */
291 	uint8_t			dring_mtype;	/* dring mem map type */
292 	uint64_t		*ldcmsg;	/* msg buffer for ldc_read() */
293 	uint64_t		msglen;		/* size of ldcmsg */
294 	uint32_t		flags;		/* flags */
295 	uint_t			reset_in_progress; /* channel being reset */
296 	uint32_t		hretries;	/* handshake retry count */
297 	uint32_t		ldc_reset_count; /* # of channel resets */
298 
299 	/*
300 	 * Transmit Specific Fields
301 	 */
302 	/* TX-Common (Used in both TxDring and RxDringData modes) */
303 	uint32_t		num_txds;	   /* # of descriptors */
304 	uint32_t		tx_dring_ncookies; /* # of dring cookies */
305 	ldc_dring_handle_t	tx_dring_handle;   /* dring handle */
306 	ldc_mem_cookie_t	tx_dring_cookie;   /* dring cookie */
307 	uint32_t		next_txi;	   /* free descriptor index */
308 	caddr_t			tx_datap;	   /* tx data area */
309 	size_t			tx_data_sz;	   /* size of data area */
310 	size_t			tx_dblk_sz;	   /* size of data blk */
311 	timeout_id_t		wd_tid;		   /* watchdog timeout id */
312 	boolean_t		tx_blocked;	   /* flow controlled */
313 	clock_t			tx_blocked_lbolt;  /* flow controlled time */
314 	boolean_t		resched_peer;	   /* restart peer needed */
315 	uint32_t		resched_peer_txi;  /* index to resched peer */
316 	vgen_ldctx_t		tx;		   /* transmit function */
317 	vgen_ldctx_t		tx_dringdata;	   /* dring transmit function */
318 
319 	/* TX-TxDring mode */
320 	vnet_public_desc_t	*txdp;		/* exported dring */
321 	vgen_private_desc_t	*tbufp;		/* dring associated resources */
322 	vgen_private_desc_t	*tbufendp;	/* tbuf ring end */
323 	vgen_private_desc_t	*next_tbufp;	/* free tbuf */
324 	vgen_private_desc_t	*cur_tbufp;	/* reclaim tbuf */
325 	uint32_t		cur_txi;	/* reclaim descrptor index */
326 	uint64_t		next_txseq;	/* msg seqnum */
327 	clock_t			reclaim_lbolt;	/* time of last reclaim */
328 
329 	/* TX-RxDringData mode */
330 	uint32_t		tx_data_ncookies; /* # of data cookies */
331 	ldc_mem_handle_t	tx_data_handle;	  /* mapped data handle */
332 	ldc_mem_cookie_t	*tx_data_cookie;  /* mapped data cookies */
333 	vnet_rx_dringdata_desc_t *mtxdp;	  /* mapped dring */
334 	uint32_t		dringdata_msgid;  /* msg id */
335 
336 	/*
337 	 * Receive Specific Fields
338 	 */
339 	/* RX-Common (Used in both TxDring and RxDringData modes) */
340 	uint32_t		num_rxds;	   /* # of descriptors */
341 	uint32_t		rx_dring_ncookies; /* # of dring cookies */
342 	ldc_dring_handle_t	rx_dring_handle;   /* dring handle */
343 	ldc_mem_cookie_t	rx_dring_cookie;   /* dring cookie */
344 	uint32_t		next_rxi;	   /* free descriptor index */
345 	vgen_ldcrx_dringdata_t	rx_dringdata;	   /* dring rcv function */
346 	vgen_ldcrx_pktdata_t	rx_pktdata;	   /* raw data rcv function */
347 	boolean_t		polling_on;	   /* polling enabled ? */
348 
349 	/* RX-TxDring mode */
350 	vnet_public_desc_t	*mrxdp;		 /* mapped dring */
351 	uint64_t		next_rxseq;	 /* msg seqnum */
352 	vio_multi_pool_t	vmp;		 /* mblk pools */
353 	uint32_t		max_rxpool_size; /* max size of rxpool in use */
354 	mblk_t			*pollq_headp;	 /* head of pkts in pollq */
355 	mblk_t			*pollq_tailp;	 /* tail of pkts in pollq */
356 	kthread_t		*msg_thread;	 /* message thread */
357 	uint32_t		msg_thr_flags;	 /* message thread flags */
358 	kmutex_t		msg_thr_lock;	 /* lock for message thread */
359 	kcondvar_t		msg_thr_cv;	 /* cond.var for msg thread */
360 
361 	/* RX-RxDringData mode */
362 	uint32_t		num_rbufs;	  /* # of data bufs */
363 	uint32_t		rx_data_ncookies; /* # of data cookies */
364 	ldc_mem_handle_t	rx_data_handle;	  /* exported data handle */
365 	ldc_mem_cookie_t	*rx_data_cookie;  /* exported data cookies */
366 	vnet_rx_dringdata_desc_t *rxdp;		  /* exported dring */
367 	vio_mblk_pool_t		*rx_vmp;	  /* mblk pool */
368 	vio_mblk_t		**rxdp_to_vmp;	  /* descr to buf map tbl */
369 	caddr_t			rx_datap;	  /* mapped rx data area */
370 	size_t			rx_data_sz;	  /* size of mapped rx data */
371 	size_t			rx_dblk_sz;	  /* size of each rx data blk */
372 	mblk_t			*rx_pri_head;	  /* priority pkts head */
373 	mblk_t			*rx_pri_tail;	  /* priority pkts tail */
374 
375 	/* Channel Statistics */
376 	vgen_stats_t		stats;		/* channel statistics */
377 	kstat_t			*ksp;		/* channel kstats */
378 } vgen_ldc_t;
379 
380 /* port information  structure */
381 typedef struct vgen_port {
382 	struct vgen_port	*nextp;		/* next port in the list */
383 	struct vgen		*vgenp;		/* associated vgen_t */
384 	int			port_num;	/* port number */
385 	boolean_t		is_vsw_port;	/* connected to vswitch ? */
386 	int			num_ldcs;	/* # of channels in this port */
387 	uint64_t		*ldc_ids;	/* channel ids */
388 	vgen_ldc_t		*ldcp;		/* list of ldcs for this port */
389 	ether_addr_t		macaddr;	/* mac address of peer */
390 	uint16_t		pvid;		/* port vlan id (untagged) */
391 	uint16_t		*vids;		/* vlan ids (tagged) */
392 	uint16_t		nvids;		/* # of vids */
393 	mod_hash_t		*vlan_hashp;	/* vlan hash table */
394 	uint32_t		vlan_nchains;	/* # of vlan hash chains */
395 	uint32_t		use_vsw_port;	/* Use vsw_port or not */
396 	uint32_t		flags;		/* status of this port */
397 	vio_net_callbacks_t	vcb;		/* vnet callbacks */
398 	vio_net_handle_t	vhp;		/* handle from vnet */
399 	kmutex_t		lock;		/* synchornize ops */
400 } vgen_port_t;
401 
402 /* port list structure */
403 typedef struct vgen_portlist {
404 	vgen_port_t	*headp;		/* head of ports */
405 	vgen_port_t	*tailp;		/* tail */
406 	krwlock_t	rwlock;		/* sync access to the port list */
407 } vgen_portlist_t;
408 
409 /* vgen instance information  */
410 typedef struct vgen {
411 	vnet_t			*vnetp;		/* associated vnet instance */
412 	int			instance;	/* vnet instance */
413 	dev_info_t		*vnetdip;	/* dip of vnet */
414 	uint64_t		regprop;	/* "reg" property */
415 	ether_addr_t		macaddr;	/* mac addr of vnet */
416 	kmutex_t		lock;		/* synchornize ops */
417 	int			flags;		/* flags */
418 	vgen_portlist_t		vgenports;	/* Port List */
419 	mdeg_node_spec_t	*mdeg_parentp;
420 	mdeg_handle_t		mdeg_dev_hdl;	/* mdeg cb handle for device */
421 	mdeg_handle_t		mdeg_port_hdl;	/* mdeg cb handle for port */
422 	vgen_port_t		*vsw_portp;	/* port connected to vsw */
423 	struct ether_addr	*mctab;		/* multicast addr table */
424 	uint32_t		mcsize;		/* allocated size of mctab */
425 	uint32_t		mccount;	/* # of valid addrs in mctab */
426 	ddi_taskq_t		*rxp_taskq;	/* VIO rx pool taskq */
427 	uint32_t		pri_num_types;	/* # of priority eth types */
428 	uint16_t		*pri_types;	/* priority eth types */
429 	vio_mblk_pool_t		*pri_tx_vmp;	/* tx priority mblk pool */
430 	uint32_t		max_frame_size;	/* max frame size supported */
431 
432 	uint32_t		vsw_port_refcnt; /* refcnt for vsw_port */
433 	boolean_t		pls_negotiated;	/* phys link state update ? */
434 	link_state_t		phys_link_state; /* physical link state */
435 } vgen_t;
436 
437 #ifdef __cplusplus
438 }
439 #endif
440 
441 #endif	/* _VNET_GEN_H */
442