1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _VDC_H 28 #define _VDC_H 29 30 /* 31 * Virtual disk client implementation definitions 32 */ 33 34 #include <sys/sysmacros.h> 35 #include <sys/note.h> 36 37 #include <sys/ldc.h> 38 #include <sys/vio_mailbox.h> 39 #include <sys/vdsk_mailbox.h> 40 #include <sys/vdsk_common.h> 41 42 #ifdef __cplusplus 43 extern "C" { 44 #endif 45 46 #define VDC_DRIVER_NAME "vdc" 47 48 /* 49 * Bit-field values to indicate if parts of the vdc driver are initialised. 50 */ 51 #define VDC_SOFT_STATE 0x0001 52 #define VDC_LOCKS 0x0002 53 #define VDC_MINOR 0x0004 54 #define VDC_THREAD 0x0008 55 #define VDC_DRING_INIT 0x0010 /* The DRing was created */ 56 #define VDC_DRING_BOUND 0x0020 /* The DRing was bound to an LDC channel */ 57 #define VDC_DRING_LOCAL 0x0040 /* The local private DRing was allocated */ 58 #define VDC_DRING_ENTRY 0x0080 /* At least one DRing entry was initialised */ 59 #define VDC_DRING (VDC_DRING_INIT | VDC_DRING_BOUND | \ 60 VDC_DRING_LOCAL | VDC_DRING_ENTRY) 61 #define VDC_HANDSHAKE 0x0100 /* Indicates if a handshake is in progress */ 62 #define VDC_HANDSHAKE_STOP 0x0200 /* stop further handshakes */ 63 64 /* 65 * Definitions of MD nodes/properties. 66 */ 67 #define VDC_MD_CHAN_NAME "channel-endpoint" 68 #define VDC_MD_VDEV_NAME "virtual-device" 69 #define VDC_MD_PORT_NAME "virtual-device-port" 70 #define VDC_MD_DISK_NAME "disk" 71 #define VDC_MD_CFG_HDL "cfg-handle" 72 #define VDC_MD_TIMEOUT "vdc-timeout" 73 #define VDC_MD_ID "id" 74 75 /* 76 * Definition of actions to be carried out when processing the sequence ID 77 * of a message received from the vDisk server. The function verifying the 78 * sequence number checks the 'seq_num_xxx' fields in the soft state and 79 * returns whether the message should be processed (VDC_SEQ_NUM_TODO) or 80 * whether it was it was previously processed (VDC_SEQ_NUM_SKIP). 81 */ 82 #define VDC_SEQ_NUM_INVALID -1 /* Error */ 83 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */ 84 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */ 85 86 /* 87 * Macros to get UNIT and PART number 88 */ 89 #define VDCUNIT_SHIFT 3 90 #define VDCPART_MASK 7 91 92 #define VDCUNIT(dev) (getminor((dev)) >> VDCUNIT_SHIFT) 93 #define VDCPART(dev) (getminor((dev)) & VDCPART_MASK) 94 95 /* 96 * Scheme to store the instance number and the slice number in the minor number. 97 * (NOTE: Uses the same format and definitions as the sd(7D) driver) 98 */ 99 #define VD_MAKE_DEV(instance, minor) ((instance << VDCUNIT_SHIFT) | minor) 100 101 #define VDC_EFI_DEV_SET(dev, vdsk, ioctl) \ 102 VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \ 103 (vdsk)->vdisk_bsize, (vdsk)->vdisk_size) 104 105 /* 106 * variables controlling how long to wait before timing out and how many 107 * retries to attempt before giving up when communicating with vds. 108 * 109 * These values need to be sufficiently large so that a guest can survive 110 * the reboot of the service domain. 111 */ 112 #define VDC_RETRIES 10 113 114 #define VDC_USEC_TIMEOUT_MIN (30 * MICROSEC) /* 30 sec */ 115 116 /* 117 * This macro returns the number of Hz that the vdc driver should wait before 118 * a timeout is triggered. The 'timeout' parameter specifiecs the wait 119 * time in Hz. The 'mul' parameter allows for a multiplier to be 120 * specified allowing for a backoff to be implemented (e.g. using the 121 * retry number as a multiplier) where the wait time will get longer if 122 * there is no response on the previous retry. 123 */ 124 #define VD_GET_TIMEOUT_HZ(timeout, mul) \ 125 (ddi_get_lbolt() + ((timeout) * MAX(1, (mul)))) 126 127 /* 128 * Macros to manipulate Descriptor Ring variables in the soft state 129 * structure. 130 */ 131 #define VDC_GET_NEXT_REQ_ID(vdc) ((vdc)->req_id++) 132 133 #define VDC_GET_DRING_ENTRY_PTR(vdc, idx) \ 134 (vd_dring_entry_t *)(uintptr_t)((vdc)->dring_mem_info.vaddr + \ 135 (idx * (vdc)->dring_entry_size)) 136 137 #define VDC_MARK_DRING_ENTRY_FREE(vdc, idx) \ 138 { \ 139 vd_dring_entry_t *dep = NULL; \ 140 ASSERT(vdc != NULL); \ 141 ASSERT(idx < vdc->dring_len); \ 142 ASSERT(vdc->dring_mem_info.vaddr != NULL); \ 143 dep = (vd_dring_entry_t *)(uintptr_t) \ 144 (vdc->dring_mem_info.vaddr + \ 145 (idx * vdc->dring_entry_size)); \ 146 ASSERT(dep != NULL); \ 147 dep->hdr.dstate = VIO_DESC_FREE; \ 148 } 149 150 /* Initialise the Session ID and Sequence Num in the DRing msg */ 151 #define VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc) \ 152 ASSERT(vdc != NULL); \ 153 dmsg.tag.vio_sid = vdc->session_id; \ 154 dmsg.seq_num = vdc->seq_num; 155 156 /* 157 * The states that the read thread can be in. 158 */ 159 typedef enum vdc_rd_state { 160 VDC_READ_IDLE, /* idling - conn is not up */ 161 VDC_READ_WAITING, /* waiting for data */ 162 VDC_READ_PENDING, /* pending data avail for read */ 163 VDC_READ_RESET /* channel was reset - stop reads */ 164 } vdc_rd_state_t; 165 166 /* 167 * The states that the vdc-vds connection can be in. 168 */ 169 typedef enum vdc_state { 170 VDC_STATE_INIT, /* device is initialized */ 171 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */ 172 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */ 173 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */ 174 VDC_STATE_RUNNING, /* running and accepting requests */ 175 VDC_STATE_DETACH, /* detaching */ 176 VDC_STATE_RESETTING /* resetting connection with vds */ 177 } vdc_state_t; 178 179 /* 180 * The states that the vdc instance can be in. 181 */ 182 typedef enum vdc_lc_state { 183 VDC_LC_ATTACHING, /* driver is attaching */ 184 VDC_LC_ONLINE, /* driver is attached and online */ 185 VDC_LC_DETACHING /* driver is detaching */ 186 } vdc_lc_state_t; 187 188 /* 189 * Local Descriptor Ring entry 190 * 191 * vdc creates a Local (private) descriptor ring the same size as the 192 * public descriptor ring it exports to vds. 193 */ 194 195 typedef enum { 196 VIO_read_dir, /* read data from server */ 197 VIO_write_dir, /* write data to server */ 198 VIO_both_dir /* transfer both in and out in same buffer */ 199 } vio_desc_direction_t; 200 201 typedef enum { 202 CB_STRATEGY, /* non-blocking strategy call */ 203 CB_SYNC /* synchronous operation */ 204 } vio_cb_type_t; 205 206 typedef struct vdc_local_desc { 207 boolean_t is_free; /* local state - inuse or not */ 208 209 int operation; /* VD_OP_xxx to be performed */ 210 caddr_t addr; /* addr passed in by consumer */ 211 int slice; 212 diskaddr_t offset; /* disk offset */ 213 size_t nbytes; 214 vio_cb_type_t cb_type; /* operation type blk/nonblk */ 215 void *cb_arg; /* buf passed to strategy() */ 216 vio_desc_direction_t dir; /* direction of transfer */ 217 218 caddr_t align_addr; /* used if addr non-aligned */ 219 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */ 220 vd_dring_entry_t *dep; /* public Dring Entry Pointer */ 221 222 } vdc_local_desc_t; 223 224 /* 225 * I/O queue used by failfast 226 */ 227 typedef struct vdc_io { 228 struct vdc_io *vio_next; /* next pending I/O in the queue */ 229 struct buf *vio_buf; /* buf for CB_STRATEGY I/O */ 230 clock_t vio_qtime; /* time the I/O was queued */ 231 } vdc_io_t; 232 233 /* 234 * Per vDisk server channel states 235 */ 236 #define VDC_LDC_INIT 0x0001 237 #define VDC_LDC_CB 0x0002 238 #define VDC_LDC_OPEN 0x0004 239 #define VDC_LDC (VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN) 240 241 /* 242 * vDisk server information 243 */ 244 typedef struct vdc_server { 245 struct vdc_server *next; /* Next server */ 246 struct vdc *vdcp; /* Ptr to vdc struct */ 247 uint64_t id; /* Server port id */ 248 uint64_t state; /* Server state */ 249 uint64_t ldc_id; /* Server LDC id */ 250 ldc_handle_t ldc_handle; /* Server LDC handle */ 251 ldc_status_t ldc_state; /* Server LDC state */ 252 uint64_t ctimeout; /* conn tmout (secs) */ 253 } vdc_server_t; 254 255 /* 256 * vdc soft state structure 257 */ 258 typedef struct vdc { 259 260 kmutex_t lock; /* protects next 2 sections of vars */ 261 kcondvar_t running_cv; /* signal when upper layers can send */ 262 kcondvar_t initwait_cv; /* signal when ldc conn is up */ 263 kcondvar_t dring_free_cv; /* signal when desc is avail */ 264 kcondvar_t membind_cv; /* signal when mem can be bound */ 265 boolean_t self_reset; 266 267 int initialized; /* keeps track of what's init'ed */ 268 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ 269 270 int hshake_cnt; /* number of failed handshakes */ 271 uint8_t open[OTYPCNT]; /* mask of opened slices */ 272 uint8_t open_excl; /* mask of exclusively opened slices */ 273 ulong_t open_lyr[V_NUMPAR]; /* number of layered opens */ 274 int dkio_flush_pending; /* # outstanding DKIO flushes */ 275 int validate_pending; /* # outstanding validate request */ 276 vd_disk_label_t vdisk_label; /* label type of device/disk imported */ 277 struct extvtoc *vtoc; /* structure to store VTOC data */ 278 struct dk_geom *geom; /* structure to store geometry data */ 279 vd_slice_t slice[V_NUMPAR]; /* logical partitions */ 280 281 kthread_t *msg_proc_thr; /* main msg processing thread */ 282 283 kmutex_t read_lock; /* lock to protect read */ 284 kcondvar_t read_cv; /* cv to wait for READ events */ 285 vdc_rd_state_t read_state; /* current read state */ 286 287 uint32_t sync_op_cnt; /* num of active sync operations */ 288 boolean_t sync_op_pending; /* sync operation is pending */ 289 boolean_t sync_op_blocked; /* blocked waiting to do sync op */ 290 uint32_t sync_op_status; /* status of sync operation */ 291 kcondvar_t sync_pending_cv; /* cv wait for sync op to finish */ 292 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */ 293 294 uint64_t session_id; /* common ID sent with all messages */ 295 uint64_t seq_num; /* most recent sequence num generated */ 296 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ 297 uint64_t req_id; /* Most recent Request ID generated */ 298 uint64_t req_id_proc; /* Last request ID processed by vdc */ 299 vdc_state_t state; /* Current disk client-server state */ 300 301 dev_info_t *dip; /* device info pointer */ 302 int instance; /* driver instance number */ 303 304 vio_ver_t ver; /* version number agreed with server */ 305 vd_disk_type_t vdisk_type; /* type of device/disk being imported */ 306 uint32_t vdisk_media; /* physical media type of vDisk */ 307 uint64_t vdisk_size; /* device size in blocks */ 308 uint64_t max_xfer_sz; /* maximum block size of a descriptor */ 309 uint64_t vdisk_bsize; /* blk size for the virtual disk */ 310 uint32_t vio_bmask; /* mask to check vio blk alignment */ 311 int vio_bshift; /* shift for vio blk conversion */ 312 uint64_t operations; /* bitmask of ops. server supports */ 313 struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */ 314 struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */ 315 ddi_devid_t devid; /* device id */ 316 boolean_t ctimeout_reached; /* connection timeout has expired */ 317 318 /* 319 * The ownership fields are protected by the lock mutex. The 320 * ownership_lock mutex is used to serialize ownership operations; 321 * it should be acquired before the lock mutex. 322 */ 323 kmutex_t ownership_lock; /* serialize ownership ops */ 324 int ownership; /* ownership status flags */ 325 kthread_t *ownership_thread; /* ownership thread */ 326 kcondvar_t ownership_cv; /* cv for ownership update */ 327 328 /* 329 * The failfast fields are protected by the lock mutex. 330 */ 331 kthread_t *failfast_thread; /* failfast thread */ 332 clock_t failfast_interval; /* interval in microsecs */ 333 kcondvar_t failfast_cv; /* cv for failfast update */ 334 kcondvar_t failfast_io_cv; /* cv wait for I/O to finish */ 335 vdc_io_t *failfast_io_queue; /* failfast io queue */ 336 337 /* 338 * kstats used to store I/O statistics consumed by iostat(1M). 339 * These are protected by the lock mutex. 340 */ 341 kstat_t *io_stats; 342 kstat_t *err_stats; 343 344 ldc_dring_handle_t dring_hdl; /* dring handle */ 345 ldc_mem_info_t dring_mem_info; /* dring information */ 346 uint_t dring_curr_idx; /* current index */ 347 uint32_t dring_len; /* dring length */ 348 uint32_t dring_max_cookies; /* dring max cookies */ 349 uint32_t dring_cookie_count; /* num cookies */ 350 uint32_t dring_entry_size; /* descriptor size */ 351 ldc_mem_cookie_t *dring_cookie; /* dring cookies */ 352 uint64_t dring_ident; /* dring ident */ 353 354 uint64_t threads_pending; /* num of threads */ 355 356 vdc_local_desc_t *local_dring; /* local dring */ 357 vdc_local_desc_t *local_dring_backup; /* local dring backup */ 358 int local_dring_backup_tail; /* backup dring tail */ 359 int local_dring_backup_len; /* backup dring len */ 360 361 int num_servers; /* no. of servers */ 362 vdc_server_t *server_list; /* vdisk server list */ 363 vdc_server_t *curr_server; /* curr vdisk server */ 364 } vdc_t; 365 366 /* 367 * Ownership status flags 368 */ 369 #define VDC_OWNERSHIP_NONE 0x00 /* no ownership wanted */ 370 #define VDC_OWNERSHIP_WANTED 0x01 /* ownership is wanted */ 371 #define VDC_OWNERSHIP_GRANTED 0x02 /* ownership has been granted */ 372 #define VDC_OWNERSHIP_RESET 0x04 /* ownership has been reset */ 373 374 /* 375 * Reservation conflict panic message 376 */ 377 #define VDC_RESV_CONFLICT_FMT_STR "Reservation Conflict\nDisk: " 378 #define VDC_RESV_CONFLICT_FMT_LEN (sizeof (VDC_RESV_CONFLICT_FMT_STR)) 379 380 /* 381 * Debugging macros 382 */ 383 #ifdef DEBUG 384 extern int vdc_msglevel; 385 extern uint64_t vdc_matchinst; 386 387 #define DMSG(_vdc, err_level, format, ...) \ 388 do { \ 389 if (vdc_msglevel > err_level && \ 390 (vdc_matchinst & (1ull << (_vdc)->instance))) \ 391 cmn_err(CE_CONT, "?[%d,t@%p] %s: "format, \ 392 (_vdc)->instance, (void *)curthread, \ 393 __func__, __VA_ARGS__); \ 394 _NOTE(CONSTANTCONDITION) \ 395 } while (0); 396 397 #define DMSGX(err_level, format, ...) \ 398 do { \ 399 if (vdc_msglevel > err_level) \ 400 cmn_err(CE_CONT, "?%s: "format, __func__, __VA_ARGS__);\ 401 _NOTE(CONSTANTCONDITION) \ 402 } while (0); 403 404 #define VDC_DUMP_DRING_MSG(dmsgp) \ 405 DMSGX(0, "sq:%lu start:%d end:%d ident:%lu\n", \ 406 dmsgp->seq_num, dmsgp->start_idx, \ 407 dmsgp->end_idx, dmsgp->dring_ident); 408 409 #else /* !DEBUG */ 410 #define DMSG(err_level, ...) 411 #define DMSGX(err_level, format, ...) 412 #define VDC_DUMP_DRING_MSG(dmsgp) 413 414 #endif /* !DEBUG */ 415 416 #ifdef __cplusplus 417 } 418 #endif 419 420 #endif /* _VDC_H */ 421