xref: /titanic_52/usr/src/uts/common/xen/io/xdf.h (revision 1b47e080b07ee427f2239a6564769802c9e5ac99)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 
28 #ifndef _SYS_XDF_H
29 #define	_SYS_XDF_H
30 
31 #pragma ident	"%Z%%M%	%I%	%E% SMI"
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 
38 #define	BLKIF_RING_SIZE	__RING_SIZE((blkif_sring_t *)NULL, PAGESIZE)
39 
40 /*
41  * VBDs have standard 512 byte blocks
42  * A single blkif_request can transfer up to 11 pages of data, 1 page/segment
43  */
44 #define	XB_BSIZE	DEV_BSIZE
45 #define	XB_BMASK	(XB_BSIZE - 1)
46 #define	XB_BSHIFT	9
47 #define	XB_DTOB(bn)	((bn) << XB_BSHIFT)
48 
49 #define	XB_MAX_SEGLEN	(8 * XB_BSIZE)
50 #define	XB_SEGOFFSET	(XB_MAX_SEGLEN - 1)
51 #define	XB_MAX_XFER	(XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST)
52 #define	XB_MAXPHYS	(XB_MAX_XFER * BLKIF_RING_SIZE)
53 
54 /*
55  * blkif status
56  */
57 enum xdf_state {
58 	/*
59 	 * initial state
60 	 */
61 	XD_UNKNOWN,
62 	/*
63 	 * ring and evtchn alloced, xenbus state changed to
64 	 * XenbusStateInitialised, wait for backend to connect
65 	 */
66 	XD_INIT,
67 	/*
68 	 * backend's xenbus state has changed to XenbusStateConnected,
69 	 * this is the only state allowing I/Os
70 	 */
71 	XD_READY,
72 	/*
73 	 * vbd interface close request received from backend, no more I/O
74 	 * requestis allowed to be put into ring buffer, while interrupt handler
75 	 * is allowed to run to finish any outstanding I/O request, disconnect
76 	 * process is kicked off by changing xenbus state to XenbusStateClosed
77 	 */
78 	XD_CLOSING,
79 	/*
80 	 * disconnection process finished, both backend and frontend's
81 	 * xenbus state has been changed to XenbusStateClosed, can be detached
82 	 */
83 	XD_CLOSED,
84 	/*
85 	 * disconnection process finished, frontend is suspended
86 	 */
87 	XD_SUSPEND
88 };
89 
90 /*
91  * 16 partitions + fdisk
92  */
93 #define	XDF_PSHIFT	6
94 #define	XDF_PMASK	((1 << XDF_PSHIFT) - 1)
95 #define	XDF_PEXT	(1 << XDF_PSHIFT)
96 #define	XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m))
97 #define	XDF_INST(m)	((m) >> XDF_PSHIFT)
98 #define	XDF_PART(m)	((m) & XDF_PMASK)
99 
100 /*
101  * one blkif_request_t will have one corresponding ge_slot_t
102  * where we save those grant table refs used in this blkif_request_t
103  *
104  * the id of this ge_slot_t will also be put into 'id' field in
105  * each blkif_request_t when sent out to the ring buffer.
106  */
107 typedef struct ge_slot {
108 	list_node_t	link;
109 	domid_t		oeid;
110 	struct v_req	*vreq;
111 	int		isread;
112 	grant_ref_t	ghead;
113 	int		ngrefs;
114 	grant_ref_t	ge[BLKIF_MAX_SEGMENTS_PER_REQUEST];
115 } ge_slot_t;
116 
117 /*
118  * vbd I/O request
119  *
120  * An instance of this structure is bound to each buf passed to
121  * the driver's strategy by setting the pointer into bp->av_back.
122  * The id of this vreq will also be put into 'id' field in each
123  * blkif_request_t when sent out to the ring buffer for one DMA
124  * window of this buf.
125  *
126  * Vreq mainly contains DMA information for this buf. In one vreq/buf,
127  * there could be more than one DMA window, each of which will be
128  * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant
129  * table entry information for this buf. The ge_slot_t for current DMA
130  * window is pointed to by v_gs in vreq.
131  *
132  * So, grant table entries will only be alloc'ed when the DMA window is
133  * about to be transferred via blkif_request_t to the ring buffer. And
134  * they will be freed right after the blkif_response_t is seen. By this
135  * means, we can make use of grant table entries more efficiently.
136  */
137 typedef struct v_req {
138 	list_node_t	v_link;
139 	int		v_status;
140 	buf_t		*v_buf;
141 	ddi_dma_handle_t v_dmahdl;
142 	ddi_dma_cookie_t v_dmac;
143 	uint_t		v_ndmacs;
144 	uint_t		v_dmaw;
145 	uint_t		v_ndmaws;
146 	uint_t		v_nslots;
147 	ge_slot_t	*v_gs;
148 	uint64_t	v_blkno;
149 	ddi_acc_handle_t v_align;
150 	caddr_t		v_abuf;
151 	ddi_dma_handle_t v_memdmahdl;
152 	uint8_t		v_flush_diskcache;
153 } v_req_t;
154 
155 /*
156  * Status set and checked in vreq->v_status by vreq_setup()
157  *
158  * These flags will help us to continue the vreq setup work from last failure
159  * point, instead of starting from scratch after each failure.
160  */
161 #define	VREQ_INIT		0x0
162 #define	VREQ_INIT_DONE		0x1
163 #define	VREQ_DMAHDL_ALLOCED	0x2
164 #define	VREQ_MEMDMAHDL_ALLOCED	0x3
165 #define	VREQ_DMAMEM_ALLOCED	0x4
166 #define	VREQ_DMABUF_BOUND	0x5
167 #define	VREQ_GS_ALLOCED		0x6
168 #define	VREQ_DMAWIN_DONE	0x7
169 
170 /*
171  * virtual block device per-instance softstate
172  */
173 typedef struct xdf {
174 	dev_info_t	*xdf_dip;
175 	domid_t		xdf_peer; /* otherend's dom ID */
176 	xendev_ring_t	*xdf_xb_ring; /* I/O ring buffer */
177 	ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */
178 	list_t		xdf_vreq_act; /* active vreq list */
179 	list_t		xdf_gs_act; /* active grant table slot list */
180 	buf_t		*xdf_f_act; /* active buf list head */
181 	buf_t		*xdf_l_act; /* active buf list tail */
182 	enum xdf_state	xdf_status; /* status of this virtual disk */
183 	ulong_t		xdf_vd_open[OTYPCNT];
184 	ulong_t		xdf_vd_lyropen[XDF_PEXT];
185 	ulong_t		xdf_vd_exclopen;
186 	kmutex_t	xdf_dev_lk; /* mutex lock for I/O path */
187 	kmutex_t	xdf_cb_lk; /* mutex lock for event handling path */
188 	kcondvar_t	xdf_dev_cv; /* cv used in I/O path */
189 	uint_t		xdf_xdev_info; /* disk info from backend xenstore */
190 	diskaddr_t	xdf_xdev_nblocks; /* total size in block */
191 	kstat_t		*xdf_xdev_iostat;
192 	cmlb_handle_t	xdf_vd_lbl;
193 	ddi_softintr_t	xdf_softintr_id;
194 	timeout_id_t	xdf_timeout_id;
195 	struct gnttab_free_callback xdf_gnt_callback;
196 	int		xdf_feature_barrier;
197 	int		xdf_flush_supported;
198 	int		xdf_wce;
199 	char		*xdf_flush_mem;
200 	char		*xdf_cache_flush_block;
201 	int		xdf_evtchn;
202 #ifdef	DEBUG
203 	int		xdf_dmacallback_num;
204 #endif
205 } xdf_t;
206 
207 #define	BP2VREQ(bp)	((v_req_t *)((bp)->av_back))
208 
209 /*
210  * VBD I/O requests must be aligned on a 512-byte boundary and specify
211  * a transfer size which is a mutiple of 512-bytes
212  */
213 #define	ALIGNED_XFER(bp) \
214 	((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \
215 	(((bp)->b_bcount & XB_BMASK) == 0))
216 
217 #define	U_INVAL(u)	(((u)->uio_loffset & (offset_t)(XB_BMASK)) || \
218 	((u)->uio_iov->iov_len & (offset_t)(XB_BMASK)))
219 
220 /* wrap pa_to_ma() for xdf to run in dom0 */
221 #define	PATOMA(addr)	(DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr))
222 
223 #define	XD_IS_RO(vbd)	((vbd)->xdf_xdev_info & VDISK_READONLY)
224 #define	XD_IS_CD(vbd)	((vbd)->xdf_xdev_info & VDISK_CDROM)
225 #define	XD_IS_RM(vbd)	((vbd)->xdf_xdev_info & VDISK_REMOVABLE)
226 #define	IS_READ(bp)	((bp)->b_flags & B_READ)
227 #define	IS_ERROR(bp)	((bp)->b_flags & B_ERROR)
228 
229 #define	XDF_UPDATE_IO_STAT(vdp, bp)					\
230 	if ((vdp)->xdf_xdev_iostat != NULL) {				\
231 		kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat);	\
232 		size_t n_done = (bp)->b_bcount - (bp)->b_resid;		\
233 		if ((bp)->b_flags & B_READ) {				\
234 			kip->reads++;					\
235 			kip->nread += n_done;				\
236 		} else {                                                \
237 			kip->writes++;					\
238 			kip->nwritten += n_done;			\
239 		}							\
240 	}
241 
242 extern int xdfdebug;
243 #ifdef DEBUG
244 #define	DPRINTF(flag, args)	{if (xdfdebug & (flag)) prom_printf args; }
245 #define	SETDMACBON(vbd)		{(vbd)->xdf_dmacallback_num++; }
246 #define	SETDMACBOFF(vbd)	{(vbd)->xdf_dmacallback_num--; }
247 #define	ISDMACBON(vbd)		((vbd)->xdf_dmacallback_num > 0)
248 #else
249 #define	DPRINTF(flag, args)
250 #define	SETDMACBON(vbd)
251 #define	SETDMACBOFF(vbd)
252 #define	ISDMACBON(vbd)
253 #endif /* DEBUG */
254 
255 #define	DDI_DBG		0x1
256 #define	DMA_DBG		0x2
257 #define	INTR_DBG	0x8
258 #define	IO_DBG		0x10
259 #define	IOCTL_DBG	0x20
260 #define	SUSRES_DBG	0x40
261 #define	LBL_DBG		0x80
262 
263 #ifdef __cplusplus
264 }
265 #endif
266 
267 #endif	/* _SYS_XDF_H */
268