xref: /titanic_50/usr/src/uts/sun4v/sys/vsw.h (revision b94bb0f0e78c11b6013e1a33c11fd73901947bfc)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * This header file contains the basic data structures which the
29  * virtual switch (vsw) uses to communicate with its clients and
30  * the outside world.
31  *
32  * The virtual switch reads the machine description (MD) to
33  * determine how many port_t structures to create (each port_t
34  * can support communications to a single network device). The
35  * port_t's are maintained in a linked list.
36  *
37  * Each port in turn contains a number of logical domain channels
38  * (ldc's) which are inter domain communications channels which
39  * are used for passing small messages between the domains. Their
40  * may be an unlimited number of channels associated with each port,
41  * though most devices only use a single channel.
42  *
43  * The ldc is a bi-directional channel, which is divided up into
44  * two directional 'lanes', one outbound from the switch to the
45  * virtual network device, the other inbound to the switch.
46  * Depending on the type of device each lane may have seperate
47  * communication paramaters (such as mtu etc).
48  *
49  * For those network clients which use descriptor rings the
50  * rings are associated with the appropriate lane. I.e. rings
51  * which the switch exports are associated with the outbound lanes
52  * while those which the network clients are exporting to the switch
53  * are associated with the inbound lane.
54  *
55  * In diagram form the data structures look as follows:
56  *
57  * vsw instance
58  *     |
59  *     +----->port_t----->port_t----->port_t----->
60  *		|
61  *		+--->ldc_t--->ldc_t--->ldc_t--->
62  *		       |
63  *		       +--->lane_t (inbound)
64  *		       |       |
65  *		       |       +--->dring--->dring--->
66  *		       |
67  *		       +--->lane_t (outbound)
68  *			       |
69  *			       +--->dring--->dring--->
70  *
71  */
72 
73 #ifndef	_VSW_H
74 #define	_VSW_H
75 
76 #pragma ident	"%Z%%M%	%I%	%E% SMI"
77 
78 #ifdef	__cplusplus
79 extern "C" {
80 #endif
81 
82 #include <sys/vio_mailbox.h>
83 #include <sys/vnet_common.h>
84 #include <sys/ethernet.h>
85 
86 /*
87  * Default message type.
88  */
89 typedef struct def_msg {
90 	uint64_t	data[8];
91 } def_msg_t;
92 
93 /*
94  * Currently only support one major/minor pair.
95  */
96 #define	VSW_NUM_VER	1
97 
98 typedef struct ver_sup {
99 	uint32_t	ver_major:16,
100 			ver_minor:16;
101 } ver_sup_t;
102 
103 /*
104  * Only support ETHER mtu at moment.
105  */
106 #define	VSW_MTU		ETHERMAX
107 
108 /*
109  * Lane states.
110  */
111 #define	VSW_LANE_INACTIV	0x0	/* No params set for lane */
112 
113 #define	VSW_VER_INFO_SENT	0x1	/* Version # sent to peer */
114 #define	VSW_VER_INFO_RECV	0x2	/* Version # recv from peer */
115 #define	VSW_VER_ACK_RECV	0x4
116 #define	VSW_VER_ACK_SENT	0x8
117 #define	VSW_VER_NACK_RECV	0x10
118 #define	VSW_VER_NACK_SENT	0x20
119 
120 #define	VSW_ATTR_INFO_SENT	0x40	/* Attributes sent to peer */
121 #define	VSW_ATTR_INFO_RECV	0x80	/* Peer attributes received */
122 #define	VSW_ATTR_ACK_SENT	0x100
123 #define	VSW_ATTR_ACK_RECV	0x200
124 #define	VSW_ATTR_NACK_SENT	0x400
125 #define	VSW_ATTR_NACK_RECV	0x800
126 
127 #define	VSW_DRING_INFO_SENT	0x1000	/* Dring info sent to peer */
128 #define	VSW_DRING_INFO_RECV	0x2000	/* Dring info received */
129 #define	VSW_DRING_ACK_SENT	0x4000
130 #define	VSW_DRING_ACK_RECV	0x8000
131 #define	VSW_DRING_NACK_SENT	0x10000
132 #define	VSW_DRING_NACK_RECV	0x20000
133 
134 #define	VSW_RDX_INFO_SENT	0x40000	/* RDX sent to peer */
135 #define	VSW_RDX_INFO_RECV	0x80000	/* RDX received from peer */
136 #define	VSW_RDX_ACK_SENT	0x100000
137 #define	VSW_RDX_ACK_RECV	0x200000
138 #define	VSW_RDX_NACK_SENT	0x400000
139 #define	VSW_RDX_NACK_RECV	0x800000
140 
141 #define	VSW_MCST_INFO_SENT	0x1000000
142 #define	VSW_MCST_INFO_RECV	0x2000000
143 #define	VSW_MCST_ACK_SENT	0x4000000
144 #define	VSW_MCST_ACK_RECV	0x8000000
145 #define	VSW_MCST_NACK_SENT	0x10000000
146 #define	VSW_MCST_NACK_RECV	0x20000000
147 
148 #define	VSW_LANE_ACTIVE		0x40000000	/* Lane open to xmit data */
149 
150 /* Handshake milestones */
151 #define	VSW_MILESTONE0		0x1	/* ver info exchanged */
152 #define	VSW_MILESTONE1		0x2	/* attribute exchanged */
153 #define	VSW_MILESTONE2		0x4	/* dring info exchanged */
154 #define	VSW_MILESTONE3		0x8	/* rdx exchanged */
155 #define	VSW_MILESTONE4		0x10	/* handshake complete */
156 
157 /*
158  * Lane direction (relative to ourselves).
159  */
160 #define	INBOUND			0x1
161 #define	OUTBOUND		0x2
162 
163 /* Peer session id received */
164 #define	VSW_PEER_SESSION	0x1
165 
166 /*
167  * Maximum number of consecutive reads of data from channel
168  */
169 #define	VSW_MAX_CHAN_READ	50
170 
171 /*
172  * LDC queue length
173  */
174 #define	VSW_LDC_QLEN		1024
175 
176 /*
177  * Currently only support one ldc per port.
178  */
179 #define	VSW_PORT_MAX_LDCS	1	/* max # of ldcs per port */
180 
181 /*
182  * Used for port add/deletion.
183  */
184 #define	VSW_PORT_UPDATED	0x1
185 
186 #define	LDC_TX_SUCCESS		0	/* ldc transmit success */
187 #define	LDC_TX_FAILURE		1	/* ldc transmit failure */
188 #define	LDC_TX_NORESOURCES	2	/* out of descriptors */
189 
190 /* ID of the source of a frame being switched */
191 #define	VSW_PHYSDEV		1	/* physical device associated */
192 #define	VSW_VNETPORT		2	/* port connected to vnet (over ldc) */
193 #define	VSW_LOCALDEV		4	/* vsw configured as an eth interface */
194 
195 /*
196  * Descriptor ring info
197  *
198  * Each descriptor element has a pre-allocated data buffer
199  * associated with it, into which data being transmitted is
200  * copied. By pre-allocating we speed up the copying process.
201  * The buffer is re-used once the peer has indicated that it is
202  * finished with the descriptor.
203  */
204 #define	VSW_RING_NUM_EL		512	/* Num of entries in ring */
205 #define	VSW_RING_EL_DATA_SZ	2048	/* Size of data section (bytes) */
206 #define	VSW_PRIV_SIZE	sizeof (vnet_private_desc_t)
207 #define	VSW_PUB_SIZE	sizeof (vnet_public_desc_t)
208 
209 #define	VSW_MAX_COOKIES		((ETHERMTU >> MMU_PAGESHIFT) + 2)
210 
211 /*
212  * Private descriptor
213  */
214 typedef struct vsw_private_desc {
215 	uint64_t		dstate;
216 	vnet_public_desc_t	*descp;
217 	ldc_mem_handle_t	memhandle;
218 	void			*datap;
219 	uint64_t		datalen;
220 	uint64_t		ncookies;
221 	ldc_mem_cookie_t	memcookie[VSW_MAX_COOKIES];
222 	int			bound;
223 } vsw_private_desc_t;
224 
225 /*
226  * Descriptor ring structure
227  */
228 typedef struct dring_info {
229 	struct	dring_info	*next;	/* next ring in chain */
230 	kmutex_t		dlock;
231 	uint32_t		num_descriptors;
232 	uint32_t		descriptor_size;
233 	uint32_t		options;
234 	uint32_t		ncookies;
235 	ldc_mem_cookie_t	cookie[1];
236 
237 	ldc_dring_handle_t	handle;
238 	uint64_t		ident;	/* identifier sent to peer */
239 	uint64_t		end_idx;	/* last idx processed */
240 
241 	/*
242 	 * base address of private and public portions of the
243 	 * ring (where appropriate), and data block.
244 	 */
245 	void			*pub_addr;	/* base of public section */
246 	void			*priv_addr;	/* base of private section */
247 	void			*data_addr;	/* base of data section */
248 	size_t			data_sz;	/* size of data section */
249 } dring_info_t;
250 
251 /*
252  * Each ldc connection is comprised of two lanes, incoming
253  * from a peer, and outgoing to that peer. Each lane shares
254  * common ldc parameters and also has private lane-specific
255  * parameters.
256  */
257 typedef struct lane {
258 	uint64_t	lstate;		/* Lane state */
259 	uint32_t	ver_major:16,	/* Version major number */
260 			ver_minor:16;	/* Version minor number */
261 	uint64_t	seq_num;	/* Sequence number */
262 	uint64_t	mtu;		/* ETHERMTU */
263 	uint64_t	addr;		/* Unique physical address */
264 	uint8_t		addr_type;	/* Only MAC address at moment */
265 	uint8_t		xfer_mode;	/* Dring or Pkt based */
266 	uint8_t		ack_freq;	/* Only non zero for Pkt based xfer */
267 	dring_info_t	*dringp;	/* List of drings for this lane */
268 } lane_t;
269 
270 /* channel drain states */
271 #define	VSW_LDC_INIT		0x1	/* Initial non-drain state */
272 #define	VSW_LDC_DRAINING	0x2	/* Channel draining */
273 
274 /* ldc information associated with a vsw-port */
275 typedef struct vsw_ldc {
276 	struct vsw_ldc		*ldc_next;	/* next ldc in the list */
277 	struct vsw_port		*ldc_port;	/* associated port */
278 	struct vsw		*ldc_vswp;	/* associated vsw */
279 	kmutex_t		ldc_cblock;	/* sync callback processing */
280 	kmutex_t		ldc_txlock;	/* sync transmits */
281 	uint64_t		ldc_id;		/* channel number */
282 	ldc_handle_t		ldc_handle;	/* channel handle */
283 	kmutex_t		drain_cv_lock;
284 	kcondvar_t		drain_cv;	/* channel draining */
285 	int			drain_state;
286 	uint32_t		hphase;		/* handshake phase */
287 	int			hcnt;		/* # handshake attempts */
288 	ldc_status_t		ldc_status;	/* channel status */
289 	uint64_t		local_session;	/* Our session id */
290 	uint64_t		peer_session;	/* Our peers session id */
291 	uint8_t			session_status;	/* Session recv'd, sent */
292 	kmutex_t		hss_lock;
293 	uint32_t		hss_id;		/* Handshake session id */
294 	uint64_t		next_ident;	/* Next dring ident # to use */
295 	lane_t			lane_in;	/* Inbound lane */
296 	lane_t			lane_out;	/* Outbound lane */
297 	uint8_t			dev_class;	/* Peer device class */
298 } vsw_ldc_t;
299 
300 /* list of ldcs per port */
301 typedef struct vsw_ldc_list {
302 	vsw_ldc_t	*head;		/* head of the list */
303 	krwlock_t	lockrw;		/* sync access(rw) to the list */
304 	int		num_ldcs;	/* number of ldcs in the list */
305 } vsw_ldc_list_t;
306 
307 /* multicast addresses port is interested in */
308 typedef struct mcst_addr {
309 	struct mcst_addr	*nextp;
310 	uint64_t		addr;
311 } mcst_addr_t;
312 
313 /* Port detach states */
314 #define	VSW_PORT_INIT		0x1	/* Initial non-detach state */
315 #define	VSW_PORT_DETACHING	0x2	/* In process of being detached */
316 #define	VSW_PORT_DETACHABLE	0x4	/* Safe to detach */
317 
318 /* port information associated with a vsw */
319 typedef struct vsw_port {
320 	int			p_instance;	/* port instance */
321 	struct vsw_port		*p_next;	/* next port in the list */
322 	struct vsw		*p_vswp;	/* associated vsw */
323 	vsw_ldc_list_t		p_ldclist;	/* list of ldcs for this port */
324 
325 	kmutex_t		tx_lock;	/* transmit lock */
326 	int			(*transmit)(vsw_ldc_t *, mblk_t *);
327 
328 	int			state;		/* port state */
329 	kmutex_t		state_lock;
330 	kcondvar_t		state_cv;
331 
332 	int			ref_cnt;	/* # of active references */
333 	kmutex_t		ref_lock;
334 	kcondvar_t		ref_cv;
335 
336 	kmutex_t		mca_lock;	/* multicast lock */
337 	mcst_addr_t		*mcap;		/* list of multicast addrs */
338 
339 	/*
340 	 * mac address of the port & connected device
341 	 */
342 	struct ether_addr	p_macaddr;
343 } vsw_port_t;
344 
345 /* list of ports per vsw */
346 typedef struct vsw_port_list {
347 	vsw_port_t	*head;		/* head of the list */
348 	krwlock_t	lockrw;		/* sync access(rw) to the list */
349 	int		num_ports;	/* number of ports in the list */
350 } vsw_port_list_t;
351 
352 /*
353  * Taskq control message
354  */
355 typedef struct vsw_ctrl_task {
356 	vsw_ldc_t	*ldcp;
357 	def_msg_t	pktp;
358 	uint32_t	hss_id;
359 } vsw_ctrl_task_t;
360 
361 /*
362  * Number of hash chains in the multicast forwarding database.
363  */
364 #define		VSW_NCHAINS	8
365 
366 /*
367  * State of interface if switch plumbed as network device.
368  */
369 #define		VSW_IF_UP	0x1	/* Interface UP */
370 #define		VSW_IF_PROMISC	0x2	/* Interface in promiscious mode */
371 
372 #define		VSW_U_P(state)	\
373 			(state == (VSW_IF_UP | VSW_IF_PROMISC))
374 
375 /*
376  * Switching modes.
377  */
378 #define		VSW_LAYER2		0x1	/* Layer 2 - MAC switching */
379 #define		VSW_LAYER2_PROMISC	0x2	/* Layer 2 + promisc mode */
380 #define		VSW_LAYER3		0x4	/* Layer 3 - IP switching */
381 
382 #define		NUM_SMODES	3	/* number of switching modes */
383 
384 /*
385  * Bits indicating which properties we've read from MD.
386  */
387 #define		VSW_MD_PHYSNAME	0x1
388 #define		VSW_MD_MACADDR	0x2
389 #define		VSW_MD_SMODE	0x4
390 
391 /*
392  * vsw instance state information.
393  */
394 typedef struct	vsw {
395 	int			instance;	/* instance # */
396 	dev_info_t		*dip;		/* associated dev_info */
397 	struct vsw		*next;		/* next in list */
398 	char			physname[LIFNAMSIZ];	/* phys-dev */
399 	uint8_t			smode[NUM_SMODES];	/* switching mode */
400 	int			smode_idx;	/* curr pos in smode array */
401 	uint8_t			mdprops;	/* bitmask of props found */
402 	vsw_port_list_t		plist;		/* associated ports */
403 	ddi_taskq_t		*taskq_p;	/* VIO ctrl msg taskq */
404 	mod_hash_t		*fdb;		/* forwarding database */
405 
406 	mod_hash_t		*mfdb;		/* multicast FDB */
407 	krwlock_t		mfdbrw;		/* rwlock for mFDB */
408 
409 	/* mac layer */
410 	mac_handle_t		mh;
411 	mac_rx_handle_t		mrh;
412 	mac_notify_handle_t	mnh;
413 	const mac_txinfo_t	*txinfo;	/* MAC tx routine */
414 
415 	/* Initial promisc setting of interface */
416 	boolean_t		init_promisc;
417 
418 	/* Machine Description updates  */
419 	mdeg_node_spec_t	*inst_spec;
420 	mdeg_handle_t		mdeg_hdl;
421 
422 	/* if configured as an ethernet interface */
423 	mac_t			*if_macp;	/* MAC structure */
424 	mac_resource_handle_t	if_mrh;
425 	struct ether_addr	if_addr;	/* interface address */
426 	krwlock_t		if_lockrw;
427 	uint8_t			if_state;	/* interface state */
428 
429 	/* multicast addresses when configured as eth interface */
430 	kmutex_t		mca_lock;	/* multicast lock */
431 	mcst_addr_t		*mcap;		/* list of multicast addrs */
432 } vsw_t;
433 
434 
435 /*
436  * Ethernet broadcast address definition.
437  */
438 static	struct	ether_addr	etherbroadcastaddr = {
439 	0xff, 0xff, 0xff, 0xff, 0xff, 0xff
440 };
441 
442 #define	IS_BROADCAST(ehp) \
443 	(ether_cmp(&ehp->ether_dhost, &etherbroadcastaddr) == 0)
444 #define	IS_MULTICAST(ehp) \
445 	((ehp->ether_dhost.ether_addr_octet[0] & 01) == 1)
446 
447 #define	READ_ENTER(x)	rw_enter(x, RW_READER)
448 #define	WRITE_ENTER(x)	rw_enter(x, RW_WRITER)
449 #define	RW_EXIT(x)	rw_exit(x)
450 
451 #ifdef	__cplusplus
452 }
453 #endif
454 
455 #endif	/* _VSW_H */
456