xref: /titanic_51/usr/src/uts/sun4v/sys/vdc.h (revision 2a9459bdd821c1cf59590a7a9069ac9c591e8a6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #ifndef	_VDC_H
28 #define	_VDC_H
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 /*
33  * Virtual disk client implementation definitions
34  */
35 
36 #include <sys/sysmacros.h>
37 #include <sys/note.h>
38 
39 #include <sys/ldc.h>
40 #include <sys/vio_mailbox.h>
41 #include <sys/vdsk_mailbox.h>
42 #include <sys/vdsk_common.h>
43 
44 #ifdef	__cplusplus
45 extern "C" {
46 #endif
47 
48 #define	VDC_DRIVER_NAME		"vdc"
49 
50 /*
51  * Bit-field values to indicate if parts of the vdc driver are initialised.
52  */
53 #define	VDC_SOFT_STATE	0x0001
54 #define	VDC_LOCKS	0x0002
55 #define	VDC_MINOR	0x0004
56 #define	VDC_THREAD	0x0008
57 #define	VDC_LDC		0x0010
58 #define	VDC_LDC_INIT	0x0020
59 #define	VDC_LDC_CB	0x0040
60 #define	VDC_LDC_OPEN	0x0080
61 #define	VDC_DRING_INIT	0x0100	/* The DRing was created */
62 #define	VDC_DRING_BOUND	0x0200	/* The DRing was bound to an LDC channel */
63 #define	VDC_DRING_LOCAL	0x0400	/* The local private DRing was allocated */
64 #define	VDC_DRING_ENTRY	0x0800	/* At least one DRing entry was initialised */
65 #define	VDC_DRING	(VDC_DRING_INIT | VDC_DRING_BOUND |	\
66 				VDC_DRING_LOCAL | VDC_DRING_ENTRY)
67 #define	VDC_HANDSHAKE	0x1000	/* Indicates if a handshake is in progress */
68 #define	VDC_HANDSHAKE_STOP	0x2000	/* stop further handshakes */
69 
70 /*
71  * Definitions of strings to be used to create device node properties.
72  * (vdc uses the capitalised versions of these properties as they are 64-bit)
73  */
74 #define	VDC_NBLOCKS_PROP_NAME		"Nblocks"
75 #define	VDC_SIZE_PROP_NAME		"Size"
76 
77 /*
78  * Definitions of MD nodes/properties.
79  */
80 #define	VDC_MD_CHAN_NAME		"channel-endpoint"
81 #define	VDC_MD_VDEV_NAME		"virtual-device"
82 #define	VDC_MD_PORT_NAME		"virtual-device-port"
83 #define	VDC_MD_DISK_NAME		"disk"
84 #define	VDC_MD_CFG_HDL			"cfg-handle"
85 #define	VDC_MD_TIMEOUT			"vdc-timeout"
86 #define	VDC_MD_ID			"id"
87 
88 /*
89  * Definition of actions to be carried out when processing the sequence ID
90  * of a message received from the vDisk server. The function verifying the
91  * sequence number checks the 'seq_num_xxx' fields in the soft state and
92  * returns whether the message should be processed (VDC_SEQ_NUM_TODO) or
93  * whether it was it was previously processed (VDC_SEQ_NUM_SKIP).
94  */
95 #define	VDC_SEQ_NUM_INVALID		-1	/* Error */
96 #define	VDC_SEQ_NUM_SKIP		0	/* Request already processed */
97 #define	VDC_SEQ_NUM_TODO		1	/* Request needs processing */
98 
99 /*
100  * Macros to get UNIT and PART number
101  */
102 #define	VDCUNIT_SHIFT	3
103 #define	VDCPART_MASK	7
104 
105 #define	VDCUNIT(dev)	(getminor((dev)) >> VDCUNIT_SHIFT)
106 #define	VDCPART(dev)	(getminor((dev)) &  VDCPART_MASK)
107 
108 /*
109  * Scheme to store the instance number and the slice number in the minor number.
110  * (NOTE: Uses the same format and definitions as the sd(7D) driver)
111  */
112 #define	VD_MAKE_DEV(instance, minor)	((instance << VDCUNIT_SHIFT) | minor)
113 
114 /*
115  * variables controlling how long to wait before timing out and how many
116  * retries to attempt before giving up when communicating with vds.
117  *
118  * These values need to be sufficiently large so that a guest can survive
119  * the reboot of the service domain.
120  */
121 #define	VDC_RETRIES	10
122 
123 #define	VDC_USEC_TIMEOUT_MIN	(30 * MICROSEC)		/* 30 sec */
124 
125 /*
126  * This macro returns the number of Hz that the vdc driver should wait before
127  * a timeout is triggered. The 'timeout' parameter specifiecs the wait
128  * time in Hz. The 'mul' parameter allows for a multiplier to be
129  * specified allowing for a backoff to be implemented (e.g. using the
130  * retry number as a multiplier) where the wait time will get longer if
131  * there is no response on the previous retry.
132  */
133 #define	VD_GET_TIMEOUT_HZ(timeout, mul)	\
134 	(ddi_get_lbolt() + ((timeout) * MAX(1, (mul))))
135 
136 /*
137  * Macros to manipulate Descriptor Ring variables in the soft state
138  * structure.
139  */
140 #define	VDC_GET_NEXT_REQ_ID(vdc)	((vdc)->req_id++)
141 
142 #define	VDC_GET_DRING_ENTRY_PTR(vdc, idx)	\
143 		(vd_dring_entry_t *)(uintptr_t)((vdc)->dring_mem_info.vaddr + \
144 			(idx * (vdc)->dring_entry_size))
145 
146 #define	VDC_MARK_DRING_ENTRY_FREE(vdc, idx)			\
147 	{ \
148 		vd_dring_entry_t *dep = NULL;				\
149 		ASSERT(vdc != NULL);					\
150 		ASSERT(idx < vdc->dring_len);		\
151 		ASSERT(vdc->dring_mem_info.vaddr != NULL);		\
152 		dep = (vd_dring_entry_t *)(uintptr_t)			\
153 			(vdc->dring_mem_info.vaddr +	\
154 			(idx * vdc->dring_entry_size));			\
155 		ASSERT(dep != NULL);					\
156 		dep->hdr.dstate = VIO_DESC_FREE;			\
157 	}
158 
159 /* Initialise the Session ID and Sequence Num in the DRing msg */
160 #define	VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc)		\
161 		ASSERT(vdc != NULL);			\
162 		dmsg.tag.vio_sid = vdc->session_id;	\
163 		dmsg.seq_num = vdc->seq_num;
164 
165 /*
166  * The states that the read thread can be in.
167  */
168 typedef enum vdc_rd_state {
169 	VDC_READ_IDLE,			/* idling - conn is not up */
170 	VDC_READ_WAITING,		/* waiting for data */
171 	VDC_READ_PENDING,		/* pending data avail for read */
172 	VDC_READ_RESET			/* channel was reset - stop reads */
173 } vdc_rd_state_t;
174 
175 /*
176  * The states that the vdc-vds connection can be in.
177  */
178 typedef enum vdc_state {
179 	VDC_STATE_INIT,			/* device is initialized */
180 	VDC_STATE_INIT_WAITING,		/* waiting for ldc connection */
181 	VDC_STATE_NEGOTIATE,		/* doing handshake negotiation */
182 	VDC_STATE_HANDLE_PENDING,	/* handle requests in backup dring */
183 	VDC_STATE_RUNNING,		/* running and accepting requests */
184 	VDC_STATE_DETACH,		/* detaching */
185 	VDC_STATE_RESETTING		/* resetting connection with vds */
186 } vdc_state_t;
187 
188 /*
189  * The states that the vdc instance can be in.
190  */
191 typedef enum vdc_lc_state {
192 	VDC_LC_ATTACHING,	/* driver is attaching */
193 	VDC_LC_ONLINE,		/* driver is attached and online */
194 	VDC_LC_DETACHING	/* driver is detaching */
195 } vdc_lc_state_t;
196 
197 /*
198  * Local Descriptor Ring entry
199  *
200  * vdc creates a Local (private) descriptor ring the same size as the
201  * public descriptor ring it exports to vds.
202  */
203 
204 typedef enum {
205 	VIO_read_dir,		/* read data from server */
206 	VIO_write_dir,		/* write data to server */
207 	VIO_both_dir		/* transfer both in and out in same buffer */
208 } vio_desc_direction_t;
209 
210 typedef enum {
211 	CB_STRATEGY,		/* non-blocking strategy call */
212 	CB_SYNC			/* synchronous operation */
213 } vio_cb_type_t;
214 
215 typedef struct vdc_local_desc {
216 	boolean_t		is_free;	/* local state - inuse or not */
217 
218 	int			operation;	/* VD_OP_xxx to be performed */
219 	caddr_t			addr;		/* addr passed in by consumer */
220 	int			slice;
221 	diskaddr_t		offset;		/* disk offset */
222 	size_t			nbytes;
223 	vio_cb_type_t		cb_type;	/* operation type blk/nonblk */
224 	void			*cb_arg;	/* buf passed to strategy() */
225 	vio_desc_direction_t	dir;		/* direction of transfer */
226 
227 	caddr_t			align_addr;	/* used if addr non-aligned */
228 	ldc_mem_handle_t	desc_mhdl;	/* Mem handle of buf */
229 	vd_dring_entry_t	*dep;		/* public Dring Entry Pointer */
230 
231 } vdc_local_desc_t;
232 
233 /*
234  * vdc soft state structure
235  */
236 typedef struct vdc {
237 
238 	kmutex_t	lock;		/* protects next 2 sections of vars */
239 	kcondvar_t	running_cv;	/* signal when upper layers can send */
240 	kcondvar_t	initwait_cv;	/* signal when ldc conn is up */
241 	kcondvar_t	dring_free_cv;	/* signal when desc is avail */
242 	kcondvar_t	membind_cv;	/* signal when mem can be bound */
243 	boolean_t	self_reset;
244 
245 	int		initialized;	/* keeps track of what's init'ed */
246 	vdc_lc_state_t	lifecycle;	/* Current state of the vdc instance */
247 
248 	int		hshake_cnt;	/* number of failed handshakes */
249 	uint8_t		open[OTYPCNT];	/* mask of opened slices */
250 	uint8_t		open_excl;	/* mask of exclusively opened slices */
251 	ulong_t		open_lyr[V_NUMPAR]; /* number of layered opens */
252 	int		dkio_flush_pending; /* # outstanding DKIO flushes */
253 	int		validate_pending; /* # outstanding validate request */
254 	vd_disk_label_t vdisk_label; 	/* label type of device/disk imported */
255 	struct vtoc	*vtoc;		/* structure to store VTOC data */
256 	struct dk_geom	*geom;		/* structure to store geometry data */
257 
258 	kthread_t	*msg_proc_thr;	/* main msg processing thread */
259 
260 	kmutex_t	read_lock;	/* lock to protect read */
261 	kcondvar_t	read_cv;	/* cv to wait for READ events */
262 	vdc_rd_state_t	read_state;	/* current read state */
263 
264 	uint32_t	sync_op_cnt;	/* num of active sync operations */
265 	boolean_t	sync_op_pending; /* sync operation is pending */
266 	boolean_t	sync_op_blocked; /* blocked waiting to do sync op */
267 	uint32_t	sync_op_status;	/* status of sync operation */
268 	kcondvar_t	sync_pending_cv; /* cv wait for sync op to finish */
269 	kcondvar_t	sync_blocked_cv; /* cv wait for other syncs to finish */
270 
271 	uint64_t	session_id;	/* common ID sent with all messages */
272 	uint64_t	seq_num;	/* most recent sequence num generated */
273 	uint64_t	seq_num_reply;	/* Last seq num ACK/NACK'ed by vds */
274 	uint64_t	req_id;		/* Most recent Request ID generated */
275 	uint64_t	req_id_proc;	/* Last request ID processed by vdc */
276 	vdc_state_t	state;		/* Current disk client-server state */
277 
278 	dev_info_t	*dip;		/* device info pointer */
279 	int		instance;	/* driver instance number */
280 
281 	vio_ver_t	ver;		/* version number agreed with server */
282 	vd_disk_type_t	vdisk_type;	/* type of device/disk being imported */
283 	uint32_t	vdisk_media;	/* physical media type of vDisk */
284 	uint64_t	vdisk_size;	/* device size in blocks */
285 	uint64_t	max_xfer_sz;	/* maximum block size of a descriptor */
286 	uint64_t	block_size;	/* device block size used */
287 	uint64_t	operations;	/* bitmask of ops. server supports */
288 	struct dk_cinfo	*cinfo;		/* structure to store DKIOCINFO data */
289 	struct dk_minfo	*minfo;		/* structure for DKIOCGMEDIAINFO data */
290 	ddi_devid_t	devid;		/* device id */
291 	uint64_t	ctimeout;	/* connection timeout in seconds */
292 	boolean_t	ctimeout_reached; /* connection timeout has expired */
293 
294 	ldc_mem_info_t		dring_mem_info;		/* dring information */
295 	uint_t			dring_curr_idx;		/* current index */
296 	uint32_t		dring_len;		/* dring length */
297 	uint32_t		dring_max_cookies;	/* dring max cookies */
298 	uint32_t		dring_cookie_count;	/* num cookies */
299 	uint32_t		dring_entry_size;	/* descriptor size */
300 	ldc_mem_cookie_t 	*dring_cookie;		/* dring cookies */
301 	uint64_t		dring_ident;		/* dring ident */
302 
303 	uint64_t		threads_pending; 	/* num of threads */
304 
305 	vdc_local_desc_t	*local_dring;		/* local dring */
306 	vdc_local_desc_t	*local_dring_backup;	/* local dring backup */
307 	int			local_dring_backup_tail; /* backup dring tail */
308 	int			local_dring_backup_len;	/* backup dring len */
309 
310 	uint64_t		ldc_id;			/* LDC channel id */
311 	ldc_status_t		ldc_state;		/* LDC channel state */
312 	ldc_handle_t		ldc_handle;		/* LDC handle */
313 	ldc_dring_handle_t	ldc_dring_hdl;		/* LDC dring handle */
314 } vdc_t;
315 
316 /*
317  * Debugging macros
318  */
319 #ifdef DEBUG
320 extern int	vdc_msglevel;
321 extern uint64_t	vdc_matchinst;
322 
323 #define	DMSG(_vdc, err_level, format, ...)				\
324 	do {								\
325 		if (vdc_msglevel > err_level &&				\
326 		(vdc_matchinst & (1ull << (_vdc)->instance)))		\
327 			cmn_err(CE_CONT, "?[%d,t@%p] %s: "format,	\
328 			(_vdc)->instance, (void *)curthread,		\
329 			__func__, __VA_ARGS__);				\
330 		_NOTE(CONSTANTCONDITION)				\
331 	} while (0);
332 
333 #define	DMSGX(err_level, format, ...)					\
334 	do {								\
335 		if (vdc_msglevel > err_level)				\
336 			cmn_err(CE_CONT, "?%s: "format, __func__, __VA_ARGS__);\
337 		_NOTE(CONSTANTCONDITION)				\
338 	} while (0);
339 
340 #define	VDC_DUMP_DRING_MSG(dmsgp)					\
341 		DMSGX(0, "sq:%lu start:%d end:%d ident:%lu\n",		\
342 			dmsgp->seq_num, dmsgp->start_idx,		\
343 			dmsgp->end_idx, dmsgp->dring_ident);
344 
345 #else	/* !DEBUG */
346 #define	DMSG(err_level, ...)
347 #define	DMSGX(err_level, format, ...)
348 #define	VDC_DUMP_DRING_MSG(dmsgp)
349 
350 #endif	/* !DEBUG */
351 
352 #ifdef	__cplusplus
353 }
354 #endif
355 
356 #endif	/* _VDC_H */
357