xref: /illumos-gate/usr/src/uts/common/xen/io/xdf.h (revision a74f7440e9d4ba2cf59e6cbfc445479a28170f2a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #ifndef _SYS_XDF_H
29 #define	_SYS_XDF_H
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 
38 #define	BLKIF_RING_SIZE \
39 	__RING_SIZE((blkif_sring_t *)NULL, PAGESIZE)
40 #define	BLKIF_X86_32_RING_SIZE \
41 	__RING_SIZE((blkif_x86_32_sring_t *)NULL, PAGESIZE)
42 #define	BLKIF_X86_64_RING_SIZE \
43 	__RING_SIZE((blkif_x86_64_sring_t *)NULL, PAGESIZE)
44 
45 /*
46  * VBDs have standard 512 byte blocks
47  * A single blkif_request can transfer up to 11 pages of data, 1 page/segment
48  */
49 #define	XB_BSIZE	DEV_BSIZE
50 #define	XB_BMASK	(XB_BSIZE - 1)
51 #define	XB_BSHIFT	9
52 #define	XB_DTOB(bn)	((bn) << XB_BSHIFT)
53 
54 #define	XB_MAX_SEGLEN	(8 * XB_BSIZE)
55 #define	XB_SEGOFFSET	(XB_MAX_SEGLEN - 1)
56 #define	XB_MAX_XFER	(XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST)
57 #define	XB_MAXPHYS	(XB_MAX_XFER * BLKIF_RING_SIZE)
58 
59 
60 /*
61  * Slice for absolute disk transaction.
62  *
63  * Hack Alert.  XB_SLICE_NONE is a magic value that can be written into the
64  * b_private field of buf structures passed to xdf_strategy().  When present
65  * it indicates that the I/O is using an absolute offset.  (ie, the I/O is
66  * not bound to any one partition.)  This magic value is currently used by
67  * the pv_cmdk driver.  This hack is shamelessly stolen from the sun4v vdc
68  * driver, another virtual disk device driver.  (Although in the case of
69  * vdc the hack is less egregious since it is self contained within the
70  * vdc driver, where as here it is used as an interface between the pv_cmdk
71  * driver and the xdf driver.)
72  */
73 #define	XB_SLICE_NONE	0xFF
74 
75 /*
76  * blkif status
77  */
78 enum xdf_state {
79 	/*
80 	 * initial state
81 	 */
82 	XD_UNKNOWN,
83 	/*
84 	 * ring and evtchn alloced, xenbus state changed to
85 	 * XenbusStateInitialised, wait for backend to connect
86 	 */
87 	XD_INIT,
88 	/*
89 	 * backend's xenbus state has changed to XenbusStateConnected,
90 	 * this is the only state allowing I/Os
91 	 */
92 	XD_READY,
93 	/*
94 	 * vbd interface close request received from backend, no more I/O
95 	 * requestis allowed to be put into ring buffer, while interrupt handler
96 	 * is allowed to run to finish any outstanding I/O request, disconnect
97 	 * process is kicked off by changing xenbus state to XenbusStateClosed
98 	 */
99 	XD_CLOSING,
100 	/*
101 	 * disconnection process finished, both backend and frontend's
102 	 * xenbus state has been changed to XenbusStateClosed, can be detached
103 	 */
104 	XD_CLOSED,
105 	/*
106 	 * disconnection process finished, frontend is suspended
107 	 */
108 	XD_SUSPEND
109 };
110 
111 /*
112  * 16 partitions + fdisk
113  */
114 #define	XDF_PSHIFT	6
115 #define	XDF_PMASK	((1 << XDF_PSHIFT) - 1)
116 #define	XDF_PEXT	(1 << XDF_PSHIFT)
117 #define	XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m))
118 #define	XDF_INST(m)	((m) >> XDF_PSHIFT)
119 #define	XDF_PART(m)	((m) & XDF_PMASK)
120 
121 /*
122  * one blkif_request_t will have one corresponding ge_slot_t
123  * where we save those grant table refs used in this blkif_request_t
124  *
125  * the id of this ge_slot_t will also be put into 'id' field in
126  * each blkif_request_t when sent out to the ring buffer.
127  */
128 typedef struct ge_slot {
129 	list_node_t	link;
130 	domid_t		oeid;
131 	struct v_req	*vreq;
132 	int		isread;
133 	grant_ref_t	ghead;
134 	int		ngrefs;
135 	grant_ref_t	ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
136 } ge_slot_t;
137 
138 /*
139  * vbd I/O request
140  *
141  * An instance of this structure is bound to each buf passed to
142  * the driver's strategy by setting the pointer into bp->av_back.
143  * The id of this vreq will also be put into 'id' field in each
144  * blkif_request_t when sent out to the ring buffer for one DMA
145  * window of this buf.
146  *
147  * Vreq mainly contains DMA information for this buf. In one vreq/buf,
148  * there could be more than one DMA window, each of which will be
149  * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant
150  * table entry information for this buf. The ge_slot_t for current DMA
151  * window is pointed to by v_gs in vreq.
152  *
153  * So, grant table entries will only be alloc'ed when the DMA window is
154  * about to be transferred via blkif_request_t to the ring buffer. And
155  * they will be freed right after the blkif_response_t is seen. By this
156  * means, we can make use of grant table entries more efficiently.
157  */
158 typedef struct v_req {
159 	list_node_t	v_link;
160 	int		v_status;
161 	buf_t		*v_buf;
162 	ddi_dma_handle_t v_dmahdl;
163 	ddi_dma_cookie_t v_dmac;
164 	uint_t		v_ndmacs;
165 	uint_t		v_dmaw;
166 	uint_t		v_ndmaws;
167 	uint_t		v_nslots;
168 	ge_slot_t	*v_gs;
169 	uint64_t	v_blkno;
170 	ddi_acc_handle_t v_align;
171 	caddr_t		v_abuf;
172 	ddi_dma_handle_t v_memdmahdl;
173 	uint8_t		v_flush_diskcache;
174 } v_req_t;
175 
176 /*
177  * Status set and checked in vreq->v_status by vreq_setup()
178  *
179  * These flags will help us to continue the vreq setup work from last failure
180  * point, instead of starting from scratch after each failure.
181  */
182 #define	VREQ_INIT		0x0
183 #define	VREQ_INIT_DONE		0x1
184 #define	VREQ_DMAHDL_ALLOCED	0x2
185 #define	VREQ_MEMDMAHDL_ALLOCED	0x3
186 #define	VREQ_DMAMEM_ALLOCED	0x4
187 #define	VREQ_DMABUF_BOUND	0x5
188 #define	VREQ_GS_ALLOCED		0x6
189 #define	VREQ_DMAWIN_DONE	0x7
190 
191 /*
192  * virtual block device per-instance softstate
193  */
194 typedef struct xdf {
195 	dev_info_t	*xdf_dip;
196 	ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */
197 	domid_t		xdf_peer; /* otherend's dom ID */
198 	xendev_ring_t	*xdf_xb_ring; /* I/O ring buffer */
199 	ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */
200 	list_t		xdf_vreq_act; /* active vreq list */
201 	list_t		xdf_gs_act; /* active grant table slot list */
202 	buf_t		*xdf_f_act; /* active buf list head */
203 	buf_t		*xdf_l_act; /* active buf list tail */
204 	enum xdf_state	xdf_status; /* status of this virtual disk */
205 	ulong_t		xdf_vd_open[OTYPCNT];
206 	ulong_t		xdf_vd_lyropen[XDF_PEXT];
207 	ulong_t		xdf_vd_exclopen;
208 	kmutex_t	xdf_iostat_lk; /* muxes lock for the iostat ptr */
209 	kmutex_t	xdf_dev_lk; /* mutex lock for I/O path */
210 	kmutex_t	xdf_cb_lk; /* mutex lock for event handling path */
211 	kcondvar_t	xdf_dev_cv; /* cv used in I/O path */
212 	uint_t		xdf_xdev_info; /* disk info from backend xenstore */
213 	diskaddr_t	xdf_xdev_nblocks; /* total size in block */
214 	cmlb_geom_t	xdf_pgeom;
215 	kstat_t		*xdf_xdev_iostat;
216 	cmlb_handle_t	xdf_vd_lbl;
217 	ddi_softintr_t	xdf_softintr_id;
218 	timeout_id_t	xdf_timeout_id;
219 	struct gnttab_free_callback xdf_gnt_callback;
220 	int		xdf_feature_barrier;
221 	int		xdf_flush_supported;
222 	int		xdf_wce;
223 	char		*xdf_flush_mem;
224 	char		*xdf_cache_flush_block;
225 	int		xdf_evtchn;
226 #ifdef	DEBUG
227 	int		xdf_dmacallback_num;
228 #endif
229 } xdf_t;
230 
231 #define	BP2VREQ(bp)	((v_req_t *)((bp)->av_back))
232 
233 /*
234  * VBD I/O requests must be aligned on a 512-byte boundary and specify
235  * a transfer size which is a mutiple of 512-bytes
236  */
237 #define	ALIGNED_XFER(bp) \
238 	((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \
239 	(((bp)->b_bcount & XB_BMASK) == 0))
240 
241 #define	U_INVAL(u)	(((u)->uio_loffset & (offset_t)(XB_BMASK)) || \
242 	((u)->uio_iov->iov_len & (offset_t)(XB_BMASK)))
243 
244 /* wrap pa_to_ma() for xdf to run in dom0 */
245 #define	PATOMA(addr)	(DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr))
246 
247 #define	XD_IS_RO(vbd)	((vbd)->xdf_xdev_info & VDISK_READONLY)
248 #define	XD_IS_CD(vbd)	((vbd)->xdf_xdev_info & VDISK_CDROM)
249 #define	XD_IS_RM(vbd)	((vbd)->xdf_xdev_info & VDISK_REMOVABLE)
250 #define	IS_READ(bp)	((bp)->b_flags & B_READ)
251 #define	IS_ERROR(bp)	((bp)->b_flags & B_ERROR)
252 
253 #define	XDF_UPDATE_IO_STAT(vdp, bp)					\
254 	if ((vdp)->xdf_xdev_iostat != NULL) {				\
255 		kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat);	\
256 		size_t n_done = (bp)->b_bcount - (bp)->b_resid;		\
257 		if ((bp)->b_flags & B_READ) {				\
258 			kip->reads++;					\
259 			kip->nread += n_done;				\
260 		} else {                                                \
261 			kip->writes++;					\
262 			kip->nwritten += n_done;			\
263 		}							\
264 	}
265 
266 extern int xdfdebug;
267 #ifdef DEBUG
268 #define	DPRINTF(flag, args)	{if (xdfdebug & (flag)) prom_printf args; }
269 #define	SETDMACBON(vbd)		{(vbd)->xdf_dmacallback_num++; }
270 #define	SETDMACBOFF(vbd)	{(vbd)->xdf_dmacallback_num--; }
271 #define	ISDMACBON(vbd)		((vbd)->xdf_dmacallback_num > 0)
272 #else
273 #define	DPRINTF(flag, args)
274 #define	SETDMACBON(vbd)
275 #define	SETDMACBOFF(vbd)
276 #define	ISDMACBON(vbd)
277 #endif /* DEBUG */
278 
279 #define	DDI_DBG		0x1
280 #define	DMA_DBG		0x2
281 #define	INTR_DBG	0x8
282 #define	IO_DBG		0x10
283 #define	IOCTL_DBG	0x20
284 #define	SUSRES_DBG	0x40
285 #define	LBL_DBG		0x80
286 
287 #if defined(XPV_HVM_DRIVER)
288 extern dev_info_t *xdf_hvm_hold(char *);
289 extern int xdf_hvm_connect(dev_info_t *);
290 extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *);
291 extern int xdf_kstat_create(dev_info_t *, char *, int);
292 extern void xdf_kstat_delete(dev_info_t *);
293 #endif /* XPV_HVM_DRIVER */
294 
295 #ifdef __cplusplus
296 }
297 #endif
298 
299 #endif	/* _SYS_XDF_H */
300