1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 28 #ifndef _SYS_XDF_H 29 #define _SYS_XDF_H 30 31 #pragma ident "%Z%%M% %I% %E% SMI" 32 33 #ifdef __cplusplus 34 extern "C" { 35 #endif 36 37 38 #define BLKIF_RING_SIZE \ 39 __RING_SIZE((blkif_sring_t *)NULL, PAGESIZE) 40 #define BLKIF_X86_32_RING_SIZE \ 41 __RING_SIZE((blkif_x86_32_sring_t *)NULL, PAGESIZE) 42 #define BLKIF_X86_64_RING_SIZE \ 43 __RING_SIZE((blkif_x86_64_sring_t *)NULL, PAGESIZE) 44 45 /* 46 * VBDs have standard 512 byte blocks 47 * A single blkif_request can transfer up to 11 pages of data, 1 page/segment 48 */ 49 #define XB_BSIZE DEV_BSIZE 50 #define XB_BMASK (XB_BSIZE - 1) 51 #define XB_BSHIFT 9 52 #define XB_DTOB(bn) ((bn) << XB_BSHIFT) 53 54 #define XB_MAX_SEGLEN (8 * XB_BSIZE) 55 #define XB_SEGOFFSET (XB_MAX_SEGLEN - 1) 56 #define XB_MAX_XFER (XB_MAX_SEGLEN * BLKIF_MAX_SEGMENTS_PER_REQUEST) 57 #define XB_MAXPHYS (XB_MAX_XFER * BLKIF_RING_SIZE) 58 59 60 /* 61 * Slice for absolute disk transaction. 62 * 63 * Hack Alert. XB_SLICE_NONE is a magic value that can be written into the 64 * b_private field of buf structures passed to xdf_strategy(). When present 65 * it indicates that the I/O is using an absolute offset. (ie, the I/O is 66 * not bound to any one partition.) This magic value is currently used by 67 * the pv_cmdk driver. This hack is shamelessly stolen from the sun4v vdc 68 * driver, another virtual disk device driver. (Although in the case of 69 * vdc the hack is less egregious since it is self contained within the 70 * vdc driver, where as here it is used as an interface between the pv_cmdk 71 * driver and the xdf driver.) 72 */ 73 #define XB_SLICE_NONE 0xFF 74 75 /* 76 * blkif status 77 */ 78 enum xdf_state { 79 /* 80 * initial state 81 */ 82 XD_UNKNOWN, 83 /* 84 * ring and evtchn alloced, xenbus state changed to 85 * XenbusStateInitialised, wait for backend to connect 86 */ 87 XD_INIT, 88 /* 89 * backend's xenbus state has changed to XenbusStateConnected, 90 * this is the only state allowing I/Os 91 */ 92 XD_READY, 93 /* 94 * vbd interface close request received from backend, no more I/O 95 * requestis allowed to be put into ring buffer, while interrupt handler 96 * is allowed to run to finish any outstanding I/O request, disconnect 97 * process is kicked off by changing xenbus state to XenbusStateClosed 98 */ 99 XD_CLOSING, 100 /* 101 * disconnection process finished, both backend and frontend's 102 * xenbus state has been changed to XenbusStateClosed, can be detached 103 */ 104 XD_CLOSED, 105 /* 106 * disconnection process finished, frontend is suspended 107 */ 108 XD_SUSPEND 109 }; 110 111 /* 112 * 16 partitions + fdisk 113 */ 114 #define XDF_PSHIFT 6 115 #define XDF_PMASK ((1 << XDF_PSHIFT) - 1) 116 #define XDF_PEXT (1 << XDF_PSHIFT) 117 #define XDF_MINOR(i, m) (((i) << XDF_PSHIFT) | (m)) 118 #define XDF_INST(m) ((m) >> XDF_PSHIFT) 119 #define XDF_PART(m) ((m) & XDF_PMASK) 120 121 /* 122 * one blkif_request_t will have one corresponding ge_slot_t 123 * where we save those grant table refs used in this blkif_request_t 124 * 125 * the id of this ge_slot_t will also be put into 'id' field in 126 * each blkif_request_t when sent out to the ring buffer. 127 */ 128 typedef struct ge_slot { 129 list_node_t link; 130 domid_t oeid; 131 struct v_req *vreq; 132 int isread; 133 grant_ref_t ghead; 134 int ngrefs; 135 grant_ref_t ge[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 136 } ge_slot_t; 137 138 /* 139 * vbd I/O request 140 * 141 * An instance of this structure is bound to each buf passed to 142 * the driver's strategy by setting the pointer into bp->av_back. 143 * The id of this vreq will also be put into 'id' field in each 144 * blkif_request_t when sent out to the ring buffer for one DMA 145 * window of this buf. 146 * 147 * Vreq mainly contains DMA information for this buf. In one vreq/buf, 148 * there could be more than one DMA window, each of which will be 149 * mapped to one blkif_request_t/ge_slot_t. Ge_slot_t contains all grant 150 * table entry information for this buf. The ge_slot_t for current DMA 151 * window is pointed to by v_gs in vreq. 152 * 153 * So, grant table entries will only be alloc'ed when the DMA window is 154 * about to be transferred via blkif_request_t to the ring buffer. And 155 * they will be freed right after the blkif_response_t is seen. By this 156 * means, we can make use of grant table entries more efficiently. 157 */ 158 typedef struct v_req { 159 list_node_t v_link; 160 int v_status; 161 buf_t *v_buf; 162 ddi_dma_handle_t v_dmahdl; 163 ddi_dma_cookie_t v_dmac; 164 uint_t v_ndmacs; 165 uint_t v_dmaw; 166 uint_t v_ndmaws; 167 uint_t v_nslots; 168 ge_slot_t *v_gs; 169 uint64_t v_blkno; 170 ddi_acc_handle_t v_align; 171 caddr_t v_abuf; 172 ddi_dma_handle_t v_memdmahdl; 173 uint8_t v_flush_diskcache; 174 } v_req_t; 175 176 /* 177 * Status set and checked in vreq->v_status by vreq_setup() 178 * 179 * These flags will help us to continue the vreq setup work from last failure 180 * point, instead of starting from scratch after each failure. 181 */ 182 #define VREQ_INIT 0x0 183 #define VREQ_INIT_DONE 0x1 184 #define VREQ_DMAHDL_ALLOCED 0x2 185 #define VREQ_MEMDMAHDL_ALLOCED 0x3 186 #define VREQ_DMAMEM_ALLOCED 0x4 187 #define VREQ_DMABUF_BOUND 0x5 188 #define VREQ_GS_ALLOCED 0x6 189 #define VREQ_DMAWIN_DONE 0x7 190 191 /* 192 * virtual block device per-instance softstate 193 */ 194 typedef struct xdf { 195 dev_info_t *xdf_dip; 196 ddi_iblock_cookie_t xdf_ibc; /* mutex iblock cookie */ 197 domid_t xdf_peer; /* otherend's dom ID */ 198 xendev_ring_t *xdf_xb_ring; /* I/O ring buffer */ 199 ddi_acc_handle_t xdf_xb_ring_hdl; /* access handler for ring buffer */ 200 list_t xdf_vreq_act; /* active vreq list */ 201 list_t xdf_gs_act; /* active grant table slot list */ 202 buf_t *xdf_f_act; /* active buf list head */ 203 buf_t *xdf_l_act; /* active buf list tail */ 204 enum xdf_state xdf_status; /* status of this virtual disk */ 205 ulong_t xdf_vd_open[OTYPCNT]; 206 ulong_t xdf_vd_lyropen[XDF_PEXT]; 207 ulong_t xdf_vd_exclopen; 208 kmutex_t xdf_iostat_lk; /* muxes lock for the iostat ptr */ 209 kmutex_t xdf_dev_lk; /* mutex lock for I/O path */ 210 kmutex_t xdf_cb_lk; /* mutex lock for event handling path */ 211 kcondvar_t xdf_dev_cv; /* cv used in I/O path */ 212 uint_t xdf_xdev_info; /* disk info from backend xenstore */ 213 diskaddr_t xdf_xdev_nblocks; /* total size in block */ 214 cmlb_geom_t xdf_pgeom; 215 kstat_t *xdf_xdev_iostat; 216 cmlb_handle_t xdf_vd_lbl; 217 ddi_softintr_t xdf_softintr_id; 218 timeout_id_t xdf_timeout_id; 219 struct gnttab_free_callback xdf_gnt_callback; 220 int xdf_feature_barrier; 221 int xdf_flush_supported; 222 int xdf_wce; 223 char *xdf_flush_mem; 224 char *xdf_cache_flush_block; 225 int xdf_evtchn; 226 #ifdef DEBUG 227 int xdf_dmacallback_num; 228 #endif 229 } xdf_t; 230 231 #define BP2VREQ(bp) ((v_req_t *)((bp)->av_back)) 232 233 /* 234 * VBD I/O requests must be aligned on a 512-byte boundary and specify 235 * a transfer size which is a mutiple of 512-bytes 236 */ 237 #define ALIGNED_XFER(bp) \ 238 ((((uintptr_t)((bp)->b_un.b_addr) & XB_BMASK) == 0) && \ 239 (((bp)->b_bcount & XB_BMASK) == 0)) 240 241 #define U_INVAL(u) (((u)->uio_loffset & (offset_t)(XB_BMASK)) || \ 242 ((u)->uio_iov->iov_len & (offset_t)(XB_BMASK))) 243 244 /* wrap pa_to_ma() for xdf to run in dom0 */ 245 #define PATOMA(addr) (DOMAIN_IS_INITDOMAIN(xen_info) ? addr : pa_to_ma(addr)) 246 247 #define XD_IS_RO(vbd) ((vbd)->xdf_xdev_info & VDISK_READONLY) 248 #define XD_IS_CD(vbd) ((vbd)->xdf_xdev_info & VDISK_CDROM) 249 #define XD_IS_RM(vbd) ((vbd)->xdf_xdev_info & VDISK_REMOVABLE) 250 #define IS_READ(bp) ((bp)->b_flags & B_READ) 251 #define IS_ERROR(bp) ((bp)->b_flags & B_ERROR) 252 253 #define XDF_UPDATE_IO_STAT(vdp, bp) \ 254 if ((vdp)->xdf_xdev_iostat != NULL) { \ 255 kstat_io_t *kip = KSTAT_IO_PTR((vdp)->xdf_xdev_iostat); \ 256 size_t n_done = (bp)->b_bcount - (bp)->b_resid; \ 257 if ((bp)->b_flags & B_READ) { \ 258 kip->reads++; \ 259 kip->nread += n_done; \ 260 } else { \ 261 kip->writes++; \ 262 kip->nwritten += n_done; \ 263 } \ 264 } 265 266 extern int xdfdebug; 267 #ifdef DEBUG 268 #define DPRINTF(flag, args) {if (xdfdebug & (flag)) prom_printf args; } 269 #define SETDMACBON(vbd) {(vbd)->xdf_dmacallback_num++; } 270 #define SETDMACBOFF(vbd) {(vbd)->xdf_dmacallback_num--; } 271 #define ISDMACBON(vbd) ((vbd)->xdf_dmacallback_num > 0) 272 #else 273 #define DPRINTF(flag, args) 274 #define SETDMACBON(vbd) 275 #define SETDMACBOFF(vbd) 276 #define ISDMACBON(vbd) 277 #endif /* DEBUG */ 278 279 #define DDI_DBG 0x1 280 #define DMA_DBG 0x2 281 #define INTR_DBG 0x8 282 #define IO_DBG 0x10 283 #define IOCTL_DBG 0x20 284 #define SUSRES_DBG 0x40 285 #define LBL_DBG 0x80 286 287 #if defined(XPV_HVM_DRIVER) 288 extern dev_info_t *xdf_hvm_hold(char *); 289 extern int xdf_hvm_connect(dev_info_t *); 290 extern int xdf_hvm_setpgeom(dev_info_t *, cmlb_geom_t *); 291 extern int xdf_kstat_create(dev_info_t *, char *, int); 292 extern void xdf_kstat_delete(dev_info_t *); 293 #endif /* XPV_HVM_DRIVER */ 294 295 #ifdef __cplusplus 296 } 297 #endif 298 299 #endif /* _SYS_XDF_H */ 300