xref: /titanic_51/usr/src/uts/sun4v/sys/ldc_impl.h (revision 52782930452a219339a03491668e436bcef2efd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef _LDC_IMPL_H
28 #define	_LDC_IMPL_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #ifdef __cplusplus
33 extern "C" {
34 #endif
35 
36 #include <sys/types.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/ioctl.h>
40 
41 /* Memory map table entries */
42 #define	LDC_MTBL_ENTRIES	8192	/* 8 K */
43 
44 /* Define LDC Queue info */
45 #define	LDC_PACKET_SHIFT	6
46 #define	LDC_QUEUE_ENTRIES	512
47 #define	LDC_MTU_MSGS		4
48 #define	LDC_QUEUE_SIZE		(LDC_QUEUE_ENTRIES << LDC_PACKET_SHIFT)
49 #define	LDC_DEFAULT_MTU		(LDC_QUEUE_SIZE / LDC_MTU_MSGS)
50 
51 /*
52  * LDC Reliable mode - initial packet seqid
53  * - If peer initiated handshake, RDX should contain init_seqid + 1
54  * - If this endpoint initiated handshake first data packet should
55  *   contain the message init_seqid + 1
56  */
57 #define	LDC_INIT_SEQID	0x0
58 
59 /* LDC Message types */
60 #define	LDC_CTRL	0x01	/* Control Pkt */
61 #define	LDC_DATA	0x02	/* Data Pkt */
62 #define	LDC_ERR		0x10	/* Error Pkt */
63 
64 /* LDC Message Subtypes */
65 #define	LDC_INFO	0x01	/* Control/Data/Error info pkt */
66 #define	LDC_ACK		0x02	/* Control/Data ACK */
67 #define	LDC_NACK	0x04	/* Control/Data NACK */
68 
69 /* LDC Control Messages */
70 #define	LDC_VER		0x01	/* Version message */
71 #define	LDC_RTS		0x02	/* Request to Send */
72 #define	LDC_RTR		0x03	/* Ready To Receive */
73 #define	LDC_RDX		0x04	/* Ready for data exchange */
74 
75 #define	LDC_CTRL_MASK	0x0f	/* Mask to read control bits */
76 
77 /* LDC Channel Transport State (tstate) */
78 #define	TS_TXQ_RDY	0x01	/* allocated TX queue */
79 #define	TS_RXQ_RDY	0x02	/* allocated RX queue */
80 #define	TS_INIT		(TS_TXQ_RDY | TS_RXQ_RDY)
81 #define	TS_QCONF_RDY	0x04	/* registered queues with HV */
82 #define	TS_CNEX_RDY	0x08	/* registered channel with cnex */
83 #define	TS_OPEN		(TS_INIT | TS_QCONF_RDY | TS_CNEX_RDY)
84 #define	TS_LINK_READY	0x10	/* both endpts registered Rx queues */
85 #define	TS_READY	(TS_OPEN | TS_LINK_READY)
86 #define	TS_VER_DONE	0x20	/* negotiated version */
87 #define	TS_VREADY	(TS_READY | TS_VER_DONE)
88 #define	TS_HSHAKE_DONE	0x40	/* completed handshake */
89 #define	TS_UP		(TS_READY | TS_VER_DONE | TS_HSHAKE_DONE)
90 
91 /*  LDC Channel Transport Handshake states */
92 #define	TS_SENT_VER	0x01	/* Sent version */
93 #define	TS_SENT_RTS	0x02	/* Sent RTS */
94 #define	TS_RCVD_RTR	0x04	/* Received RTR */
95 #define	TS_SENT_RDX	0x08	/* Sent RDX */
96 #define	TS_RCVD_VER	0x10	/* Received version */
97 #define	TS_RCVD_RTS	0x20	/* Received RTS */
98 #define	TS_SENT_RTR	0x40	/* Sent RTR */
99 #define	TS_RCVD_RDX	0x80	/* Received RDX */
100 
101 /* LDC MSG Envelope */
102 #define	LDC_LEN_MASK	0x3F
103 #define	LDC_FRAG_MASK	0xC0
104 
105 #define	LDC_FRAG_START	0x40	/* frag_info = 0x01 */
106 #define	LDC_FRAG_STOP	0x80	/* frag_info = 0x02 */
107 #define	LDC_FRAG_CONT	0x00	/* frag_info = 0x00 */
108 
109 /*
110  * LDC will retry LDC_MAX_RETRIES times when sending or
111  * receiving data or if the HV returns back EWOULDBLOCK.
112  * Between each retry it will wait LDC_DELAY usecs.
113  */
114 #define	LDC_MAX_RETRIES	1000
115 #define	LDC_DELAY	1
116 
117 /*
118  * LDC Version information
119  */
120 #define	LDC_PAYLOAD_VER_OFF	8	/* offset of version in payload */
121 
122 typedef struct ldc_ver {
123 	uint16_t	major;
124 	uint16_t	minor;
125 } ldc_ver_t;
126 
127 /*
128  * Each guest consists of one or more LDC endpoints represented by a ldc_chan
129  * structure. Each ldc_chan structure points to a ldc_mtbl structure that
130  * contains information about the map table associated with this LDC endpoint.
131  * The map table contains the list of pages being shared by this guest over
132  * this endpoint with the guest at the other end of this endpoint. Each LDC
133  * endpoint also points to a list of memory handles used to bind and export
134  * memory segments from this guest. If a memory segment is bound, it points to
135  * a memory segment structure, which inturn consists of an array of ldc_page
136  * structure for all the pages within that segment. Each ldc_page structure
137  * contains information about the shared page and also points to the
138  * corresponding entry in the map table.
139  *
140  * Each LDC endpoint also points to a list of ldc_dring structures that refer
141  * to both imported and exported descriptor rings. If it is a exported
142  * descriptor ring, it then points to memory handle/memseg corresponding to
143  * the region of memory associated with the descriptor ring.
144  *
145  *     +----------+   +----------+   +----------+
146  *     | ldc_chan |-->| ldc_chan |-->| ldc_chan |-->....
147  *     +----------+   +----------+   +----------+
148  *       |  |  |
149  *       |  |  |
150  *       |  |  |      +-----------+     +-----------+
151  *       |  |  +----->| ldc_dring |---->| ldc_dring |---->......
152  *       |  |         +-----------+     +-----------+
153  *       |  |               |
154  *       |  |               +----------------------------+
155  *       |  |                                            |
156  *       |  |                                            v
157  *       |  |      +----------+     +----------+     +----------+
158  *       |  +----->| ldc_mhdl |---->| ldc_mhdl |---->| ldc_mhdl |---> ....
159  *       |         +----------+     +----------+     +----------+
160  *       v                 |                             |
161  *  +----------+           |    +------------+           |    +------------+
162  *  | ldc_mtbl |--+        +--->| ldc_memseg |-----+     +--->| ldc_memseg |
163  *  +----------+  |             +------------+     |          +------------+
164  *                |                   |            |            |       |
165  *                v                   v            v            |       v
166  *     +--------------+         +----------+  +--------+        |   +--------+
167  *     | ldc_mte_slot |<--------| ldc_page |  | cookie |        |   | cookie |
168  *     +--------------+         +----------+  +--------+        |   +--------+
169  *     | ldc_mte_slot |<--------| ldc_page |  | cookie |        v
170  *     +--------------+         +----------+  +--------+   +----------+
171  *     | ldc_mte_slot |<-----------------------------------| ldc_page |
172  *     +--------------+                                    +----------+
173  *     | ldc_mte_slot |
174  *     +--------------+
175  *     |    ......    |/ +------------+
176  *     +--------------+  |   entry    |
177  *     | ldc_mte_slot |  +------------+
178  *     +--------------+  | inv_cookie |
179  *                     \ +------------+
180  *
181  */
182 
183 /*
184  * Message format of each packet sent over the LDC channel.
185  * Each packet is 64-bytes long.
186  *
187  * Each packet that is sent over LDC can contain either data or acks.
188  * The type will reflect the contents. The len will contain in bytes
189  * the amount of data being sent. In the case of ACKs, the seqid and
190  * data fields will contain the SEQIDs of messages for which ACKs are
191  * being sent.
192  *
193  * Raw pkt format:
194  *
195  *          +------------------------------------------------------+
196  *  0 - 7   |                 data payload                         |
197  *          +------------------------------------------------------+
198  *
199  * Unreliable pkt format:
200  *
201  *          +------------------------------------------------------+
202  *      0   |          seqid          | env  | ctrl | stype | type |
203  *          +------------------------------------------------------+
204  *  1 - 7   |                 data payload                         |
205  *          +------------------------------------------------------+
206  *
207  * Reliable pkt format:
208  *
209  *          +------------------------------------------------------+
210  *      0   |            seqid        | env  | ctrl | stype | type |
211  *          +------------------------------------------------------+
212  *      1   |          ackid          |         unused             |
213  *          +------------------------------------------------------+
214  *  2 - 7   |                 data payload                         |
215  *          +------------------------------------------------------+
216  */
217 
218 typedef struct ldc_msg {
219 	union {
220 		struct {
221 			uint8_t		_type;	/* Message type */
222 			uint8_t		_stype;	/* Message subtype */
223 			uint8_t		_ctrl;	/* Control/Error Message */
224 			uint8_t 	_env;	/* Message Envelope */
225 			uint32_t	_seqid;	/* Sequence ID */
226 
227 			union {
228 				uint8_t	_ud[LDC_PAYLOAD_SIZE_UNRELIABLE];
229 						/* Unreliable data payload */
230 				struct {
231 					uint32_t _unused;	/* unused */
232 					uint32_t _ackid;	/* ACK ID */
233 					uint8_t	_rd[LDC_PAYLOAD_SIZE_RELIABLE];
234 						/* Reliable data payload */
235 				} _rl;
236 			} _data;
237 		} _tpkt;
238 
239 		uint8_t		_raw[LDC_PAYLOAD_SIZE_RAW];
240 	} _pkt;
241 
242 } ldc_msg_t;
243 
244 #define	raw		_pkt._raw
245 #define	type		_pkt._tpkt._type
246 #define	stype		_pkt._tpkt._stype
247 #define	ctrl		_pkt._tpkt._ctrl
248 #define	env		_pkt._tpkt._env
249 #define	seqid		_pkt._tpkt._seqid
250 #define	udata		_pkt._tpkt._data._ud
251 #define	ackid		_pkt._tpkt._data._rl._ackid
252 #define	rdata		_pkt._tpkt._data._rl._rd
253 
254 /*
255  * LDC Map Table Entry (MTE)
256  *
257  *   6    6                               1    1  1
258  *  |3    0|                       psz|   3|   1| 0| 9| 8| 7|6|5|4|      0|
259  *  +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
260  *  | rsvd |           PFN            | 0  | 0  |CW|CR|IW|IR|X|W|R| pgszc |
261  *  +------+--------------------------+----+----+--+--+--+--+-+-+-+-------+
262  *  |                       hv invalidation cookie                        |
263  *  +---------------------------------------------------------------------+
264  */
265 typedef union {
266 	struct {
267 		uint64_t	_rsvd2:8,	/* <63:56> reserved */
268 				rpfn:43,	/* <55:13> real pfn */
269 				_rsvd1:2,	/* <12:11> reserved */
270 				cw:1,		/* <10> copy write access */
271 				cr:1,		/* <9> copy read perm */
272 				iw:1,		/* <8> iommu write perm */
273 				ir:1,		/* <7> iommu read perm */
274 				x:1,		/* <6> execute perm */
275 				w:1,		/* <5> write perm */
276 				r:1,		/* <4> read perm */
277 				pgszc:4;	/* <3:0> pgsz code */
278 	} mte_bit;
279 
280 	uint64_t 		ll;
281 
282 } ldc_mte_t;
283 
284 #define	mte_rpfn	mte_bit.rpfn
285 #define	mte_cw		mte_bit.cw
286 #define	mte_cr		mte_bit.cr
287 #define	mte_iw		mte_bit.iw
288 #define	mte_ir		mte_bit.ir
289 #define	mte_x		mte_bit.x
290 #define	mte_w		mte_bit.w
291 #define	mte_r		mte_bit.r
292 #define	mte_pgszc	mte_bit.pgszc
293 
294 #define	MTE_BSZS_SHIFT(sz)	((sz) * 3)
295 #define	MTEBYTES(sz)    	(MMU_PAGESIZE << MTE_BSZS_SHIFT(sz))
296 #define	MTEPAGES(sz)    	(1 << MTE_BSZS_SHIFT(sz))
297 #define	MTE_PAGE_SHIFT(sz)	(MMU_PAGESHIFT + MTE_BSZS_SHIFT(sz))
298 #define	MTE_PAGE_OFFSET(sz)	(MTEBYTES(sz) - 1)
299 #define	MTE_PAGEMASK(sz)	(~MTE_PAGE_OFFSET(sz))
300 #define	MTE_PFNMASK(sz)		(~(MTE_PAGE_OFFSET(sz) >> MMU_PAGESHIFT))
301 
302 /*
303  * LDC Map Table Slot
304  */
305 typedef struct ldc_mte_slot {
306 	ldc_mte_t	entry;
307 	uint64_t	cookie;
308 } ldc_mte_slot_t;
309 
310 /*
311  * LDC Memory Map Table
312  *
313  * Each LDC has a memory map table it uses to list all the pages
314  * it exporting to its peer over the channel. This structure
315  * contains information about the map table and is pointed to
316  * by the ldc_chan structure.
317  */
318 typedef struct ldc_mtbl {
319 	kmutex_t		lock;		/* Table lock */
320 	size_t			size;		/* Table size (in bytes) */
321 	uint64_t		next_entry;	/* Next entry to use */
322 	uint64_t		num_entries;	/* Num entries in table */
323 	uint64_t		num_avail;	/* Num of available entries */
324 	ldc_mte_slot_t		*table;		/* The table itself */
325 } ldc_mtbl_t;
326 
327 /*
328  * LDC page and memory segment information
329  */
330 typedef struct ldc_page {
331 	uintptr_t		raddr;		/* Exported page RA */
332 	uint64_t		offset;		/* Exported page offset */
333 	size_t			size;		/* Exported page size */
334 	uint64_t		index;		/* Index in map table */
335 	ldc_mte_slot_t		*mte;		/* Map table entry */
336 } ldc_page_t;
337 
338 typedef struct ldc_memseg {
339 	caddr_t			vaddr;		/* Exported segment VA */
340 	uintptr_t		raddr;		/* Exported segment VA */
341 	size_t			size;		/* Exported segment size */
342 	uint64_t		npages;		/* Number of pages */
343 	ldc_page_t		*pages;		/* Array of exported pages */
344 	uint32_t		ncookies;	/* Number of cookies */
345 	ldc_mem_cookie_t	*cookies;
346 	uint64_t		next_cookie;	/* Index to next cookie */
347 } ldc_memseg_t;
348 
349 /*
350  * LDC Cookie address format
351  *
352  *   6       6          m+n
353  *  |3|      0|          |                  m|                  0|
354  *  +-+-------+----------+-------------------+-------------------+
355  *  |X| pgszc |   rsvd   |      table_idx    |     page_offset   |
356  *  +-+-------+----------+-------------------+-------------------+
357  */
358 #define	LDC_COOKIE_PGSZC_MASK	0x7
359 #define	LDC_COOKIE_PGSZC_SHIFT	60
360 
361 /*
362  * LDC Memory handle
363  */
364 typedef struct ldc_chan ldc_chan_t;
365 
366 typedef struct ldc_mhdl {
367 	kmutex_t		lock;		/* Mutex for memory handle */
368 	ldc_mstatus_t		status;		/* Memory map status */
369 
370 	uint8_t			mtype;		/* Type of sharing */
371 	uint8_t			perm;		/* Access permissions */
372 	boolean_t		myshadow;	/* TRUE=alloc'd shadow mem */
373 
374 	ldc_chan_t		*ldcp;		/* Pointer to channel struct */
375 	ldc_memseg_t		*memseg;	/* Bound memory segment */
376 	struct ldc_mhdl		*next;		/* Next memory handle */
377 } ldc_mhdl_t;
378 
379 /*
380  * LDC Descriptor rings
381  */
382 
383 typedef struct ldc_dring {
384 	kmutex_t		lock;		/* Desc ring lock */
385 	ldc_mstatus_t		status;		/* Desc ring status */
386 
387 	uint32_t		dsize;		/* Descriptor size */
388 	uint32_t		length;		/* Descriptor ring length */
389 	uint64_t		size;		/* Desc ring size (in bytes) */
390 	caddr_t			base;		/* Descriptor ring base addr */
391 
392 	ldc_chan_t		*ldcp;		/* Pointer to bound channel */
393 	ldc_mem_handle_t	mhdl;		/* Mem handle to desc ring */
394 
395 	struct ldc_dring	*ch_next;	/* Next dring in channel */
396 	struct ldc_dring 	*next;		/* Next dring overall */
397 
398 } ldc_dring_t;
399 
400 
401 /*
402  * Channel specific information is kept in a separate
403  * structure. These are then stored on a array indexed
404  * by the channel number.
405  */
406 struct ldc_chan {
407 	ldc_chan_t	*next;		/* Next channel */
408 
409 	kmutex_t	lock;		/* Channel lock */
410 	uint64_t	id;		/* Channel ID */
411 	ldc_status_t	status;		/* Channel status */
412 	uint32_t	tstate;		/* Channel transport state */
413 	uint32_t	hstate;		/* Channel transport handshake state */
414 
415 	ldc_dev_t	devclass;	/* Associated device class */
416 	uint64_t	devinst;	/* Associated device instance */
417 	ldc_mode_t	mode;		/* Channel mode */
418 
419 	uint64_t	mtu;		/* Max TU size (streaming for now) */
420 
421 	ldc_ver_t	version;	/* Channel version */
422 	uint32_t	next_vidx;	/* Next version to match */
423 
424 	uint_t		(*cb)(uint64_t event, caddr_t arg);
425 	caddr_t		cb_arg;		/* Channel callback and arg */
426 	boolean_t	cb_inprogress;	/* Channel callback in progress */
427 	boolean_t	cb_enabled;	/* Channel callbacks are enabled */
428 
429 	boolean_t	tx_intr_pending; /* TRUE if Tx interrupts are pending */
430 	boolean_t	rx_intr_pending; /* TRUE if Rx interrupts are pending */
431 
432 	kmutex_t	tx_lock;	/* Transmit lock */
433 	uint64_t	tx_q_entries;	/* Num entries in transmit queue */
434 	uint64_t	tx_q_va;	/* Virtual addr of transmit queue */
435 	uint64_t	tx_q_ra;	/* Real addr of transmit queue */
436 	uint64_t	tx_head;	/* Tx queue head */
437 	uint64_t	tx_ackd_head;	/* Tx queue ACKd head (Reliable) */
438 	uint64_t	tx_tail;	/* Tx queue tail */
439 
440 	uint64_t	rx_q_entries;	/* Num entries in receive queue */
441 	uint64_t	rx_q_va;	/* Virtual addr of receive queue */
442 	uint64_t	rx_q_ra;	/* Real addr of receive queue */
443 
444 	uint64_t	link_state;	/* Underlying HV channel state */
445 
446 	ldc_mtbl_t	*mtbl;		/* Memory table used by channel */
447 	ldc_mhdl_t	*mhdl_list;	/* List of memory handles */
448 	kmutex_t	mlist_lock;	/* Mem handle list lock */
449 
450 	ldc_dring_t	*exp_dring_list; /* Exported desc ring list */
451 	kmutex_t	exp_dlist_lock;	/* Lock for exported desc ring list */
452 	ldc_dring_t	*imp_dring_list; /* Imported desc ring list */
453 	kmutex_t	imp_dlist_lock;	/* Lock for imported desc ring list */
454 
455 	uint8_t		pkt_payload;	/* Size of packet payload */
456 
457 	uint32_t	last_msg_snt;	/* Seqid of last packet sent */
458 	uint32_t	last_ack_rcd;	/* Seqid of last ACK recd */
459 	uint32_t	last_msg_rcd;	/* Seqid of last packet received */
460 
461 	uint32_t	stream_remains;	/* Number of bytes in stream */
462 					/* packet buffer */
463 	uint32_t	stream_offset;	/* Offset into packet buffer for */
464 					/* next read */
465 	uint8_t		*stream_bufferp; /* Stream packet buffer */
466 
467 	int		(*read_p)(ldc_chan_t *ldcp, caddr_t bufferp,
468 				size_t *sizep);
469 	int		(*write_p)(ldc_chan_t *ldcp, caddr_t bufferp,
470 				size_t *sizep);
471 };
472 
473 
474 /*
475  * LDC module soft state structure
476  */
477 typedef struct ldc_soft_state {
478 	kmutex_t 	lock;		/* Protects ldc_soft_state_t  */
479 	ldc_cnex_t	cinfo;		/* channel nexus info */
480 	uint64_t	channel_count;	/* Number of channels */
481 	uint64_t	channels_open;	/* Number of open channels */
482 	ldc_chan_t 	*chan_list;	/* List of LDC endpoints */
483 	ldc_dring_t	*dring_list;	/* Descriptor rings (for export) */
484 
485 	kmem_cache_t	*memhdl_cache;	/* Memory handle cache */
486 	kmem_cache_t	*memseg_cache;	/* Memory segment cache */
487 } ldc_soft_state_t;
488 
489 #ifdef __cplusplus
490 }
491 #endif
492 
493 #endif /* _LDC_IMPL_H */
494