xref: /illumos-gate/usr/src/uts/sun4v/sys/vsw_ldc.h (revision fa94a07fd0519b8abfd871ad8fe60e6bebe1e2bb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This header file contains the basic data structures which the
29  * virtual switch (vsw) uses to communicate with vnet clients.
30  *
31  * The virtual switch reads the machine description (MD) to
32  * determine how many port_t structures to create (each port_t
33  * can support communications to a single network device). The
34  * port_t's are maintained in a linked list.
35  *
36  * Each port in turn contains a number of logical domain channels
37  * (ldc's) which are inter domain communications channels which
38  * are used for passing small messages between the domains. Their
39  * may be an unlimited number of channels associated with each port,
40  * though most devices only use a single channel.
41  *
42  * The ldc is a bi-directional channel, which is divided up into
43  * two directional 'lanes', one outbound from the switch to the
44  * virtual network device, the other inbound to the switch.
45  * Depending on the type of device each lane may have seperate
46  * communication paramaters (such as mtu etc).
47  *
48  * For those network clients which use descriptor rings the
49  * rings are associated with the appropriate lane. I.e. rings
50  * which the switch exports are associated with the outbound lanes
51  * while those which the network clients are exporting to the switch
52  * are associated with the inbound lane.
53  *
54  * In diagram form the data structures look as follows:
55  *
56  * vsw instance
57  *     |
58  *     +----->port_t----->port_t----->port_t----->
59  *		|
60  *		+--->ldc_t--->ldc_t--->ldc_t--->
61  *		       |
62  *		       +--->lane_t (inbound)
63  *		       |       |
64  *		       |       +--->dring--->dring--->
65  *		       |
66  *		       +--->lane_t (outbound)
67  *			       |
68  *			       +--->dring--->dring--->
69  *
70  */
71 
72 #ifndef	_VSW_LDC_H
73 #define	_VSW_LDC_H
74 
75 #pragma ident	"%Z%%M%	%I%	%E% SMI"
76 
77 #ifdef	__cplusplus
78 extern "C" {
79 #endif
80 
81 /*
82  * Default message type.
83  */
84 typedef struct def_msg {
85 	uint64_t	data[8];
86 } def_msg_t;
87 
88 /*
89  * Currently only support one major/minor pair.
90  */
91 #define	VSW_NUM_VER	1
92 
93 typedef struct ver_sup {
94 	uint32_t	ver_major:16,
95 			ver_minor:16;
96 } ver_sup_t;
97 
98 /*
99  * Lane states.
100  */
101 #define	VSW_LANE_INACTIV	0x0	/* No params set for lane */
102 
103 #define	VSW_VER_INFO_SENT	0x1	/* Version # sent to peer */
104 #define	VSW_VER_INFO_RECV	0x2	/* Version # recv from peer */
105 #define	VSW_VER_ACK_RECV	0x4
106 #define	VSW_VER_ACK_SENT	0x8
107 #define	VSW_VER_NACK_RECV	0x10
108 #define	VSW_VER_NACK_SENT	0x20
109 
110 #define	VSW_ATTR_INFO_SENT	0x40	/* Attributes sent to peer */
111 #define	VSW_ATTR_INFO_RECV	0x80	/* Peer attributes received */
112 #define	VSW_ATTR_ACK_SENT	0x100
113 #define	VSW_ATTR_ACK_RECV	0x200
114 #define	VSW_ATTR_NACK_SENT	0x400
115 #define	VSW_ATTR_NACK_RECV	0x800
116 
117 #define	VSW_DRING_INFO_SENT	0x1000	/* Dring info sent to peer */
118 #define	VSW_DRING_INFO_RECV	0x2000	/* Dring info received */
119 #define	VSW_DRING_ACK_SENT	0x4000
120 #define	VSW_DRING_ACK_RECV	0x8000
121 #define	VSW_DRING_NACK_SENT	0x10000
122 #define	VSW_DRING_NACK_RECV	0x20000
123 
124 #define	VSW_RDX_INFO_SENT	0x40000	/* RDX sent to peer */
125 #define	VSW_RDX_INFO_RECV	0x80000	/* RDX received from peer */
126 #define	VSW_RDX_ACK_SENT	0x100000
127 #define	VSW_RDX_ACK_RECV	0x200000
128 #define	VSW_RDX_NACK_SENT	0x400000
129 #define	VSW_RDX_NACK_RECV	0x800000
130 
131 #define	VSW_MCST_INFO_SENT	0x1000000
132 #define	VSW_MCST_INFO_RECV	0x2000000
133 #define	VSW_MCST_ACK_SENT	0x4000000
134 #define	VSW_MCST_ACK_RECV	0x8000000
135 #define	VSW_MCST_NACK_SENT	0x10000000
136 #define	VSW_MCST_NACK_RECV	0x20000000
137 
138 #define	VSW_LANE_ACTIVE		0x40000000	/* Lane open to xmit data */
139 
140 /* Handshake milestones */
141 #define	VSW_MILESTONE0		0x1	/* ver info exchanged */
142 #define	VSW_MILESTONE1		0x2	/* attribute exchanged */
143 #define	VSW_MILESTONE2		0x4	/* dring info exchanged */
144 #define	VSW_MILESTONE3		0x8	/* rdx exchanged */
145 #define	VSW_MILESTONE4		0x10	/* handshake complete */
146 
147 /*
148  * Lane direction (relative to ourselves).
149  */
150 #define	INBOUND			0x1
151 #define	OUTBOUND		0x2
152 
153 /* Peer session id received */
154 #define	VSW_PEER_SESSION	0x1
155 
156 /*
157  * Maximum number of consecutive reads of data from channel
158  */
159 #define	VSW_MAX_CHAN_READ	50
160 
161 /*
162  * Currently only support one ldc per port.
163  */
164 #define	VSW_PORT_MAX_LDCS	1	/* max # of ldcs per port */
165 
166 /*
167  * Used for port add/deletion.
168  */
169 #define	VSW_PORT_UPDATED	0x1
170 
171 #define	LDC_TX_SUCCESS		0	/* ldc transmit success */
172 #define	LDC_TX_FAILURE		1	/* ldc transmit failure */
173 #define	LDC_TX_NORESOURCES	2	/* out of descriptors */
174 
175 /*
176  * Descriptor ring info
177  *
178  * Each descriptor element has a pre-allocated data buffer
179  * associated with it, into which data being transmitted is
180  * copied. By pre-allocating we speed up the copying process.
181  * The buffer is re-used once the peer has indicated that it is
182  * finished with the descriptor.
183  */
184 #define	VSW_RING_NUM_EL		512	/* Num of entries in ring */
185 #define	VSW_RING_EL_DATA_SZ	2048	/* Size of data section (bytes) */
186 #define	VSW_PRIV_SIZE	sizeof (vnet_private_desc_t)
187 #define	VSW_PUB_SIZE	sizeof (vnet_public_desc_t)
188 
189 #define	VSW_MAX_COOKIES		((ETHERMTU >> MMU_PAGESHIFT) + 2)
190 
191 /*
192  * LDC pkt tranfer MTU
193  */
194 #define	VSW_LDC_MTU	sizeof (def_msg_t)
195 
196 /*
197  * Size of the mblk in each mblk pool.
198  */
199 #define	VSW_MBLK_SZ_128		128
200 #define	VSW_MBLK_SZ_256		256
201 #define	VSW_MBLK_SZ_2048	2048
202 
203 /*
204  * Number of mblks in each mblk pool.
205  */
206 #define	VSW_NUM_MBLKS	1024
207 
208 /*
209  * Private descriptor
210  */
211 typedef struct vsw_private_desc {
212 	/*
213 	 * Below lock must be held when accessing the state of
214 	 * a descriptor on either the private or public sections
215 	 * of the ring.
216 	 */
217 	kmutex_t		dstate_lock;
218 	uint64_t		dstate;
219 	vnet_public_desc_t	*descp;
220 	ldc_mem_handle_t	memhandle;
221 	void			*datap;
222 	uint64_t		datalen;
223 	uint64_t		ncookies;
224 	ldc_mem_cookie_t	memcookie[VSW_MAX_COOKIES];
225 	int			bound;
226 } vsw_private_desc_t;
227 
228 /*
229  * Descriptor ring structure
230  */
231 typedef struct dring_info {
232 	struct	dring_info	*next;	/* next ring in chain */
233 	kmutex_t		dlock;
234 	uint32_t		num_descriptors;
235 	uint32_t		descriptor_size;
236 	uint32_t		options;
237 	uint32_t		ncookies;
238 	ldc_mem_cookie_t	cookie[1];
239 
240 	ldc_dring_handle_t	handle;
241 	uint64_t		ident;	/* identifier sent to peer */
242 	uint64_t		end_idx;	/* last idx processed */
243 	int64_t			last_ack_recv;
244 
245 	kmutex_t		restart_lock;
246 	boolean_t		restart_reqd;	/* send restart msg */
247 
248 	/*
249 	 * base address of private and public portions of the
250 	 * ring (where appropriate), and data block.
251 	 */
252 	void			*pub_addr;	/* base of public section */
253 	void			*priv_addr;	/* base of private section */
254 	void			*data_addr;	/* base of data section */
255 	size_t			data_sz;	/* size of data section */
256 } dring_info_t;
257 
258 /*
259  * Each ldc connection is comprised of two lanes, incoming
260  * from a peer, and outgoing to that peer. Each lane shares
261  * common ldc parameters and also has private lane-specific
262  * parameters.
263  */
264 typedef struct lane {
265 	uint64_t	lstate;		/* Lane state */
266 	uint32_t	ver_major:16,	/* Version major number */
267 			ver_minor:16;	/* Version minor number */
268 	uint64_t	seq_num;	/* Sequence number */
269 	uint64_t	mtu;		/* ETHERMTU */
270 	uint64_t	addr;		/* Unique physical address */
271 	uint8_t		addr_type;	/* Only MAC address at moment */
272 	uint8_t		xfer_mode;	/* Dring or Pkt based */
273 	uint8_t		ack_freq;	/* Only non zero for Pkt based xfer */
274 	krwlock_t	dlistrw;	/* Lock for dring list */
275 	dring_info_t	*dringp;	/* List of drings for this lane */
276 } lane_t;
277 
278 /* channel drain states */
279 #define	VSW_LDC_INIT		0x1	/* Initial non-drain state */
280 #define	VSW_LDC_DRAINING	0x2	/* Channel draining */
281 
282 /* ldc information associated with a vsw-port */
283 typedef struct vsw_ldc {
284 	struct vsw_ldc		*ldc_next;	/* next ldc in the list */
285 	struct vsw_port		*ldc_port;	/* associated port */
286 	struct vsw		*ldc_vswp;	/* associated vsw */
287 	kmutex_t		ldc_cblock;	/* sync callback processing */
288 	kmutex_t		ldc_txlock;	/* sync transmits */
289 	kmutex_t		ldc_rxlock;	/* sync rx */
290 	uint64_t		ldc_id;		/* channel number */
291 	ldc_handle_t		ldc_handle;	/* channel handle */
292 	kmutex_t		drain_cv_lock;
293 	kcondvar_t		drain_cv;	/* channel draining */
294 	int			drain_state;
295 	uint32_t		hphase;		/* handshake phase */
296 	int			hcnt;		/* # handshake attempts */
297 	kmutex_t		status_lock;
298 	ldc_status_t		ldc_status;	/* channel status */
299 	uint8_t			reset_active;	/* reset flag */
300 	uint64_t		local_session;	/* Our session id */
301 	uint64_t		peer_session;	/* Our peers session id */
302 	uint8_t			session_status;	/* Session recv'd, sent */
303 	uint32_t		hss_id;		/* Handshake session id */
304 	uint64_t		next_ident;	/* Next dring ident # to use */
305 	lane_t			lane_in;	/* Inbound lane */
306 	lane_t			lane_out;	/* Outbound lane */
307 	uint8_t			dev_class;	/* Peer device class */
308 	vio_multi_pool_t	vmp;		/* Receive mblk pools */
309 
310 	/* tx thread fields */
311 	kthread_t		*tx_thread;	/* tx thread */
312 	uint32_t		tx_thr_flags;	/* tx thread flags */
313 	kmutex_t		tx_thr_lock;	/* lock for tx thread */
314 	kcondvar_t		tx_thr_cv;	/* cond.var for tx thread */
315 	mblk_t			*tx_mhead;	/* tx mblks head */
316 	mblk_t			*tx_mtail;	/* tx mblks tail */
317 	uint64_t		tx_failures; 	/* tx failures */
318 
319 	/* receive thread fields */
320 	kthread_t		*rx_thread;	/* receive thread */
321 	uint32_t		rx_thr_flags;	/* receive thread flags */
322 	kmutex_t		rx_thr_lock;	/* lock for receive thread */
323 	kcondvar_t		rx_thr_cv;	/* cond.var for recv thread */
324 
325 	/* channel statistics */
326 	vgen_stats_t		ldc_stats;	/* channel statistics */
327 	kstat_t			*ksp;		/* channel kstats */
328 } vsw_ldc_t;
329 
330 /* worker thread flags */
331 #define	VSW_WTHR_RUNNING 	0x01	/* worker thread running */
332 #define	VSW_WTHR_DATARCVD 	0x02	/* data received */
333 #define	VSW_WTHR_STOP 		0x04	/* stop worker thread request */
334 
335 /* list of ldcs per port */
336 typedef struct vsw_ldc_list {
337 	vsw_ldc_t	*head;		/* head of the list */
338 	krwlock_t	lockrw;		/* sync access(rw) to the list */
339 	int		num_ldcs;	/* number of ldcs in the list */
340 } vsw_ldc_list_t;
341 
342 /* multicast addresses port is interested in */
343 typedef struct mcst_addr {
344 	struct mcst_addr	*nextp;
345 	struct ether_addr	mca;	/* multicast address */
346 	uint64_t		addr;	/* mcast addr converted to hash key */
347 	boolean_t		mac_added; /* added into physical device */
348 } mcst_addr_t;
349 
350 /* Port detach states */
351 #define	VSW_PORT_INIT		0x1	/* Initial non-detach state */
352 #define	VSW_PORT_DETACHING	0x2	/* In process of being detached */
353 #define	VSW_PORT_DETACHABLE	0x4	/* Safe to detach */
354 
355 #define	VSW_ADDR_UNSET		0x0	/* Addr not set */
356 #define	VSW_ADDR_HW		0x1	/* Addr programmed in HW */
357 #define	VSW_ADDR_PROMISC	0x2	/* Card in promisc to see addr */
358 
359 /* port information associated with a vsw */
360 typedef struct vsw_port {
361 	int			p_instance;	/* port instance */
362 	struct vsw_port		*p_next;	/* next port in the list */
363 	struct vsw		*p_vswp;	/* associated vsw */
364 	vsw_ldc_list_t		p_ldclist;	/* list of ldcs for this port */
365 
366 	kmutex_t		tx_lock;	/* transmit lock */
367 	int			(*transmit)(vsw_ldc_t *, mblk_t *);
368 
369 	int			state;		/* port state */
370 	kmutex_t		state_lock;
371 	kcondvar_t		state_cv;
372 
373 	uint32_t		ref_cnt;	/* # of active references */
374 
375 	kmutex_t		mca_lock;	/* multicast lock */
376 	mcst_addr_t		*mcap;		/* list of multicast addrs */
377 
378 	mac_addr_slot_t		addr_slot;	/* Unicast address slot */
379 	int			addr_set;	/* Addr set where */
380 
381 	/*
382 	 * mac address of the port & connected device
383 	 */
384 	struct ether_addr	p_macaddr;
385 } vsw_port_t;
386 
387 /* list of ports per vsw */
388 typedef struct vsw_port_list {
389 	vsw_port_t	*head;		/* head of the list */
390 	krwlock_t	lockrw;		/* sync access(rw) to the list */
391 	int		num_ports;	/* number of ports in the list */
392 } vsw_port_list_t;
393 
394 /*
395  * Taskq control message
396  */
397 typedef struct vsw_ctrl_task {
398 	vsw_ldc_t	*ldcp;
399 	def_msg_t	pktp;
400 	uint32_t	hss_id;
401 } vsw_ctrl_task_t;
402 
403 /*
404  * State of connection to peer. Some of these states
405  * can be mapped to LDC events as follows:
406  *
407  * VSW_CONN_RESET -> LDC_RESET_EVT
408  * VSW_CONN_UP    -> LDC_UP_EVT
409  */
410 #define	VSW_CONN_UP		0x1	/* Connection come up */
411 #define	VSW_CONN_RESET		0x2	/* Connection reset */
412 #define	VSW_CONN_RESTART	0x4	/* Restarting handshake on connection */
413 
414 typedef struct vsw_conn_evt {
415 	uint16_t	evt;		/* Connection event */
416 	vsw_ldc_t	*ldcp;
417 } vsw_conn_evt_t;
418 
419 /*
420  * Ethernet broadcast address definition.
421  */
422 static	struct	ether_addr	etherbroadcastaddr = {
423 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
424 };
425 
426 #define	IS_BROADCAST(ehp) \
427 	(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
428 #define	IS_MULTICAST(ehp) \
429 	((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
430 
431 #define	READ_ENTER(x)	rw_enter(x, RW_READER)
432 #define	WRITE_ENTER(x)	rw_enter(x, RW_WRITER)
433 #define	RW_EXIT(x)	rw_exit(x)
434 
435 #define	VSW_PORT_REFHOLD(portp)	atomic_inc_32(&((portp)->ref_cnt))
436 #define	VSW_PORT_REFRELE(portp)	atomic_dec_32(&((portp)->ref_cnt))
437 
438 #ifdef	__cplusplus
439 }
440 #endif
441 
442 #endif	/* _VSW_LDC_H */
443