1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _VDC_H 28 #define _VDC_H 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 /* 33 * Virtual disk client implementation definitions 34 */ 35 36 #include <sys/sysmacros.h> 37 #include <sys/note.h> 38 39 #include <sys/ldc.h> 40 #include <sys/vio_mailbox.h> 41 #include <sys/vdsk_mailbox.h> 42 #include <sys/vdsk_common.h> 43 44 #ifdef __cplusplus 45 extern "C" { 46 #endif 47 48 #define VDC_DRIVER_NAME "vdc" 49 50 /* 51 * Bit-field values to indicate if parts of the vdc driver are initialised. 52 */ 53 #define VDC_SOFT_STATE 0x0001 54 #define VDC_LOCKS 0x0002 55 #define VDC_MINOR 0x0004 56 #define VDC_THREAD 0x0008 57 #define VDC_LDC 0x0010 58 #define VDC_LDC_INIT 0x0020 59 #define VDC_LDC_CB 0x0040 60 #define VDC_LDC_OPEN 0x0080 61 #define VDC_DRING_INIT 0x0100 /* The DRing was created */ 62 #define VDC_DRING_BOUND 0x0200 /* The DRing was bound to an LDC channel */ 63 #define VDC_DRING_LOCAL 0x0400 /* The local private DRing was allocated */ 64 #define VDC_DRING_ENTRY 0x0800 /* At least one DRing entry was initialised */ 65 #define VDC_DRING (VDC_DRING_INIT | VDC_DRING_BOUND | \ 66 VDC_DRING_LOCAL | VDC_DRING_ENTRY) 67 #define VDC_HANDSHAKE 0x1000 /* Indicates if a handshake is in progress */ 68 #define VDC_HANDSHAKE_STOP 0x2000 /* stop further handshakes */ 69 70 /* 71 * Definitions of strings to be used to create device node properties. 72 * (vdc uses the capitalised versions of these properties as they are 64-bit) 73 */ 74 #define VDC_NBLOCKS_PROP_NAME "Nblocks" 75 #define VDC_SIZE_PROP_NAME "Size" 76 77 /* 78 * Definitions of MD nodes/properties. 79 */ 80 #define VDC_MD_CHAN_NAME "channel-endpoint" 81 #define VDC_MD_VDEV_NAME "virtual-device" 82 #define VDC_MD_PORT_NAME "virtual-device-port" 83 #define VDC_MD_DISK_NAME "disk" 84 #define VDC_MD_CFG_HDL "cfg-handle" 85 #define VDC_MD_TIMEOUT "vdc-timeout" 86 #define VDC_MD_ID "id" 87 88 /* 89 * Definition of actions to be carried out when processing the sequence ID 90 * of a message received from the vDisk server. The function verifying the 91 * sequence number checks the 'seq_num_xxx' fields in the soft state and 92 * returns whether the message should be processed (VDC_SEQ_NUM_TODO) or 93 * whether it was it was previously processed (VDC_SEQ_NUM_SKIP). 94 */ 95 #define VDC_SEQ_NUM_INVALID -1 /* Error */ 96 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */ 97 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */ 98 99 /* 100 * Macros to get UNIT and PART number 101 */ 102 #define VDCUNIT_SHIFT 3 103 #define VDCPART_MASK 7 104 105 #define VDCUNIT(dev) (getminor((dev)) >> VDCUNIT_SHIFT) 106 #define VDCPART(dev) (getminor((dev)) & VDCPART_MASK) 107 108 /* 109 * Scheme to store the instance number and the slice number in the minor number. 110 * (NOTE: Uses the same format and definitions as the sd(7D) driver) 111 */ 112 #define VD_MAKE_DEV(instance, minor) ((instance << VDCUNIT_SHIFT) | minor) 113 114 /* 115 * variables controlling how long to wait before timing out and how many 116 * retries to attempt before giving up when communicating with vds. 117 * 118 * These values need to be sufficiently large so that a guest can survive 119 * the reboot of the service domain. 120 */ 121 #define VDC_RETRIES 10 122 123 #define VDC_USEC_TIMEOUT_MIN (30 * MICROSEC) /* 30 sec */ 124 125 /* 126 * This macro returns the number of Hz that the vdc driver should wait before 127 * a timeout is triggered. The 'timeout' parameter specifiecs the wait 128 * time in Hz. The 'mul' parameter allows for a multiplier to be 129 * specified allowing for a backoff to be implemented (e.g. using the 130 * retry number as a multiplier) where the wait time will get longer if 131 * there is no response on the previous retry. 132 */ 133 #define VD_GET_TIMEOUT_HZ(timeout, mul) \ 134 (ddi_get_lbolt() + ((timeout) * MAX(1, (mul)))) 135 136 /* 137 * Macros to manipulate Descriptor Ring variables in the soft state 138 * structure. 139 */ 140 #define VDC_GET_NEXT_REQ_ID(vdc) ((vdc)->req_id++) 141 142 #define VDC_GET_DRING_ENTRY_PTR(vdc, idx) \ 143 (vd_dring_entry_t *)((vdc)->dring_mem_info.vaddr + \ 144 (idx * (vdc)->dring_entry_size)) 145 146 #define VDC_MARK_DRING_ENTRY_FREE(vdc, idx) \ 147 { \ 148 vd_dring_entry_t *dep = NULL; \ 149 ASSERT(vdc != NULL); \ 150 ASSERT((idx >= 0) && (idx < vdc->dring_len)); \ 151 ASSERT(vdc->dring_mem_info.vaddr != NULL); \ 152 dep = (vd_dring_entry_t *)(vdc->dring_mem_info.vaddr + \ 153 (idx * vdc->dring_entry_size)); \ 154 ASSERT(dep != NULL); \ 155 dep->hdr.dstate = VIO_DESC_FREE; \ 156 } 157 158 /* Initialise the Session ID and Sequence Num in the DRing msg */ 159 #define VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc) \ 160 ASSERT(vdc != NULL); \ 161 dmsg.tag.vio_sid = vdc->session_id; \ 162 dmsg.seq_num = vdc->seq_num; 163 164 /* 165 * The states that the read thread can be in. 166 */ 167 typedef enum vdc_rd_state { 168 VDC_READ_IDLE, /* idling - conn is not up */ 169 VDC_READ_WAITING, /* waiting for data */ 170 VDC_READ_PENDING, /* pending data avail for read */ 171 VDC_READ_RESET /* channel was reset - stop reads */ 172 } vdc_rd_state_t; 173 174 /* 175 * The states that the vdc-vds connection can be in. 176 */ 177 typedef enum vdc_state { 178 VDC_STATE_INIT, /* device is initialized */ 179 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */ 180 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */ 181 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */ 182 VDC_STATE_RUNNING, /* running and accepting requests */ 183 VDC_STATE_DETACH, /* detaching */ 184 VDC_STATE_RESETTING /* resetting connection with vds */ 185 } vdc_state_t; 186 187 /* 188 * The states that the vdc instance can be in. 189 */ 190 typedef enum vdc_lc_state { 191 VDC_LC_ATTACHING, /* driver is attaching */ 192 VDC_LC_ONLINE, /* driver is attached and online */ 193 VDC_LC_DETACHING /* driver is detaching */ 194 } vdc_lc_state_t; 195 196 /* 197 * Local Descriptor Ring entry 198 * 199 * vdc creates a Local (private) descriptor ring the same size as the 200 * public descriptor ring it exports to vds. 201 */ 202 203 typedef enum { 204 VIO_read_dir, /* read data from server */ 205 VIO_write_dir, /* write data to server */ 206 VIO_both_dir /* transfer both in and out in same buffer */ 207 } vio_desc_direction_t; 208 209 typedef enum { 210 CB_STRATEGY, /* non-blocking strategy call */ 211 CB_SYNC /* synchronous operation */ 212 } vio_cb_type_t; 213 214 typedef struct vdc_local_desc { 215 boolean_t is_free; /* local state - inuse or not */ 216 217 int operation; /* VD_OP_xxx to be performed */ 218 caddr_t addr; /* addr passed in by consumer */ 219 int slice; 220 diskaddr_t offset; /* disk offset */ 221 size_t nbytes; 222 vio_cb_type_t cb_type; /* operation type blk/nonblk */ 223 void *cb_arg; /* buf passed to strategy() */ 224 vio_desc_direction_t dir; /* direction of transfer */ 225 226 caddr_t align_addr; /* used if addr non-aligned */ 227 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */ 228 vd_dring_entry_t *dep; /* public Dring Entry Pointer */ 229 230 } vdc_local_desc_t; 231 232 /* 233 * vdc soft state structure 234 */ 235 typedef struct vdc { 236 237 kmutex_t lock; /* protects next 2 sections of vars */ 238 kcondvar_t running_cv; /* signal when upper layers can send */ 239 kcondvar_t initwait_cv; /* signal when ldc conn is up */ 240 kcondvar_t dring_free_cv; /* signal when desc is avail */ 241 kcondvar_t membind_cv; /* signal when mem can be bound */ 242 boolean_t self_reset; 243 244 int initialized; /* keeps track of what's init'ed */ 245 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ 246 247 int hshake_cnt; /* number of failed handshakes */ 248 int open_count; /* count of outstanding opens */ 249 int dkio_flush_pending; /* # outstanding DKIO flushes */ 250 251 kthread_t *msg_proc_thr; /* main msg processing thread */ 252 253 kmutex_t read_lock; /* lock to protect read */ 254 kcondvar_t read_cv; /* cv to wait for READ events */ 255 vdc_rd_state_t read_state; /* current read state */ 256 257 uint32_t sync_op_cnt; /* num of active sync operations */ 258 boolean_t sync_op_pending; /* sync operation is pending */ 259 boolean_t sync_op_blocked; /* blocked waiting to do sync op */ 260 uint32_t sync_op_status; /* status of sync operation */ 261 kcondvar_t sync_pending_cv; /* cv wait for sync op to finish */ 262 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */ 263 264 uint64_t session_id; /* common ID sent with all messages */ 265 uint64_t seq_num; /* most recent sequence num generated */ 266 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ 267 uint64_t req_id; /* Most recent Request ID generated */ 268 uint64_t req_id_proc; /* Last request ID processed by vdc */ 269 vdc_state_t state; /* Current disk client-server state */ 270 271 dev_info_t *dip; /* device info pointer */ 272 int instance; /* driver instance number */ 273 274 vio_ver_t ver; /* version number agreed with server */ 275 vd_disk_type_t vdisk_type; /* type of device/disk being imported */ 276 vd_disk_label_t vdisk_label; /* label type of device/disk imported */ 277 uint64_t vdisk_size; /* device size in blocks */ 278 uint64_t max_xfer_sz; /* maximum block size of a descriptor */ 279 uint64_t block_size; /* device block size used */ 280 struct dk_label *label; /* structure to store disk label */ 281 struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */ 282 struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */ 283 struct vtoc *vtoc; /* structure to store VTOC data */ 284 ddi_devid_t devid; /* device id */ 285 uint64_t ctimeout; /* connection timeout in seconds */ 286 boolean_t ctimeout_reached; /* connection timeout has expired */ 287 288 ldc_mem_info_t dring_mem_info; /* dring information */ 289 uint_t dring_curr_idx; /* current index */ 290 uint32_t dring_len; /* dring length */ 291 uint32_t dring_max_cookies; /* dring max cookies */ 292 uint32_t dring_cookie_count; /* num cookies */ 293 uint32_t dring_entry_size; /* descriptor size */ 294 ldc_mem_cookie_t *dring_cookie; /* dring cookies */ 295 uint64_t dring_ident; /* dring ident */ 296 297 uint64_t threads_pending; /* num of threads */ 298 299 vdc_local_desc_t *local_dring; /* local dring */ 300 vdc_local_desc_t *local_dring_backup; /* local dring backup */ 301 int local_dring_backup_tail; /* backup dring tail */ 302 int local_dring_backup_len; /* backup dring len */ 303 304 uint64_t ldc_id; /* LDC channel id */ 305 ldc_status_t ldc_state; /* LDC channel state */ 306 ldc_handle_t ldc_handle; /* LDC handle */ 307 ldc_dring_handle_t ldc_dring_hdl; /* LDC dring handle */ 308 } vdc_t; 309 310 /* 311 * Debugging macros 312 */ 313 #ifdef DEBUG 314 extern int vdc_msglevel; 315 extern uint64_t vdc_matchinst; 316 317 #define DMSG(_vdc, err_level, format, ...) \ 318 do { \ 319 if (vdc_msglevel > err_level && \ 320 (vdc_matchinst & (1ull << (_vdc)->instance))) \ 321 cmn_err(CE_CONT, "?[%d,t@%p] %s: "format, \ 322 (_vdc)->instance, (void *)curthread, \ 323 __func__, __VA_ARGS__); \ 324 _NOTE(CONSTANTCONDITION) \ 325 } while (0); 326 327 #define DMSGX(err_level, format, ...) \ 328 do { \ 329 if (vdc_msglevel > err_level) \ 330 cmn_err(CE_CONT, "?%s: "format, __func__, __VA_ARGS__);\ 331 _NOTE(CONSTANTCONDITION) \ 332 } while (0); 333 334 #define VDC_DUMP_DRING_MSG(dmsgp) \ 335 DMSGX(0, "sq:%lu start:%d end:%d ident:%lu\n", \ 336 dmsgp->seq_num, dmsgp->start_idx, \ 337 dmsgp->end_idx, dmsgp->dring_ident); 338 339 #else /* !DEBUG */ 340 #define DMSG(err_level, ...) 341 #define DMSGX(err_level, format, ...) 342 #define VDC_DUMP_DRING_MSG(dmsgp) 343 344 #endif /* !DEBUG */ 345 346 #ifdef __cplusplus 347 } 348 #endif 349 350 #endif /* _VDC_H */ 351