1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #ifndef _VDC_H 28 #define _VDC_H 29 30 /* 31 * Virtual disk client implementation definitions 32 */ 33 34 #include <sys/sysmacros.h> 35 #include <sys/note.h> 36 37 #include <sys/ldc.h> 38 #include <sys/vio_mailbox.h> 39 #include <sys/vdsk_mailbox.h> 40 #include <sys/vdsk_common.h> 41 42 #ifdef __cplusplus 43 extern "C" { 44 #endif 45 46 #define VDC_DRIVER_NAME "vdc" 47 48 /* 49 * Bit-field values to indicate if parts of the vdc driver are initialised. 50 */ 51 #define VDC_SOFT_STATE 0x0001 52 #define VDC_LOCKS 0x0002 53 #define VDC_MINOR 0x0004 54 #define VDC_THREAD 0x0008 55 #define VDC_DRING_INIT 0x0010 /* The DRing was created */ 56 #define VDC_DRING_BOUND 0x0020 /* The DRing was bound to an LDC channel */ 57 #define VDC_DRING_LOCAL 0x0040 /* The local private DRing was allocated */ 58 #define VDC_DRING_ENTRY 0x0080 /* At least one DRing entry was initialised */ 59 #define VDC_DRING (VDC_DRING_INIT | VDC_DRING_BOUND | \ 60 VDC_DRING_LOCAL | VDC_DRING_ENTRY) 61 #define VDC_HANDSHAKE 0x0100 /* Indicates if a handshake is in progress */ 62 #define VDC_HANDSHAKE_STOP 0x0200 /* stop further handshakes */ 63 64 /* 65 * Definitions of MD nodes/properties. 66 */ 67 #define VDC_MD_CHAN_NAME "channel-endpoint" 68 #define VDC_MD_VDEV_NAME "virtual-device" 69 #define VDC_MD_PORT_NAME "virtual-device-port" 70 #define VDC_MD_DISK_NAME "disk" 71 #define VDC_MD_CFG_HDL "cfg-handle" 72 #define VDC_MD_TIMEOUT "vdc-timeout" 73 #define VDC_MD_ID "id" 74 75 /* 76 * Definition of actions to be carried out when processing the sequence ID 77 * of a message received from the vDisk server. The function verifying the 78 * sequence number checks the 'seq_num_xxx' fields in the soft state and 79 * returns whether the message should be processed (VDC_SEQ_NUM_TODO) or 80 * whether it was it was previously processed (VDC_SEQ_NUM_SKIP). 81 */ 82 #define VDC_SEQ_NUM_INVALID -1 /* Error */ 83 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */ 84 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */ 85 86 /* 87 * Macros to get UNIT and PART number 88 */ 89 #define VDCUNIT_SHIFT 3 90 #define VDCPART_MASK 7 91 92 #define VDCUNIT(dev) (getminor((dev)) >> VDCUNIT_SHIFT) 93 #define VDCPART(dev) (getminor((dev)) & VDCPART_MASK) 94 95 /* 96 * Scheme to store the instance number and the slice number in the minor number. 97 * (NOTE: Uses the same format and definitions as the sd(7D) driver) 98 */ 99 #define VD_MAKE_DEV(instance, minor) ((instance << VDCUNIT_SHIFT) | minor) 100 101 /* 102 * variables controlling how long to wait before timing out and how many 103 * retries to attempt before giving up when communicating with vds. 104 * 105 * These values need to be sufficiently large so that a guest can survive 106 * the reboot of the service domain. 107 */ 108 #define VDC_RETRIES 10 109 110 #define VDC_USEC_TIMEOUT_MIN (30 * MICROSEC) /* 30 sec */ 111 112 /* 113 * This macro returns the number of Hz that the vdc driver should wait before 114 * a timeout is triggered. The 'timeout' parameter specifiecs the wait 115 * time in Hz. The 'mul' parameter allows for a multiplier to be 116 * specified allowing for a backoff to be implemented (e.g. using the 117 * retry number as a multiplier) where the wait time will get longer if 118 * there is no response on the previous retry. 119 */ 120 #define VD_GET_TIMEOUT_HZ(timeout, mul) \ 121 (ddi_get_lbolt() + ((timeout) * MAX(1, (mul)))) 122 123 /* 124 * Macros to manipulate Descriptor Ring variables in the soft state 125 * structure. 126 */ 127 #define VDC_GET_NEXT_REQ_ID(vdc) ((vdc)->req_id++) 128 129 #define VDC_GET_DRING_ENTRY_PTR(vdc, idx) \ 130 (vd_dring_entry_t *)(uintptr_t)((vdc)->dring_mem_info.vaddr + \ 131 (idx * (vdc)->dring_entry_size)) 132 133 #define VDC_MARK_DRING_ENTRY_FREE(vdc, idx) \ 134 { \ 135 vd_dring_entry_t *dep = NULL; \ 136 ASSERT(vdc != NULL); \ 137 ASSERT(idx < vdc->dring_len); \ 138 ASSERT(vdc->dring_mem_info.vaddr != NULL); \ 139 dep = (vd_dring_entry_t *)(uintptr_t) \ 140 (vdc->dring_mem_info.vaddr + \ 141 (idx * vdc->dring_entry_size)); \ 142 ASSERT(dep != NULL); \ 143 dep->hdr.dstate = VIO_DESC_FREE; \ 144 } 145 146 /* Initialise the Session ID and Sequence Num in the DRing msg */ 147 #define VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc) \ 148 ASSERT(vdc != NULL); \ 149 dmsg.tag.vio_sid = vdc->session_id; \ 150 dmsg.seq_num = vdc->seq_num; 151 152 /* 153 * The states that the read thread can be in. 154 */ 155 typedef enum vdc_rd_state { 156 VDC_READ_IDLE, /* idling - conn is not up */ 157 VDC_READ_WAITING, /* waiting for data */ 158 VDC_READ_PENDING, /* pending data avail for read */ 159 VDC_READ_RESET /* channel was reset - stop reads */ 160 } vdc_rd_state_t; 161 162 /* 163 * The states that the vdc-vds connection can be in. 164 */ 165 typedef enum vdc_state { 166 VDC_STATE_INIT, /* device is initialized */ 167 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */ 168 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */ 169 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */ 170 VDC_STATE_RUNNING, /* running and accepting requests */ 171 VDC_STATE_DETACH, /* detaching */ 172 VDC_STATE_RESETTING /* resetting connection with vds */ 173 } vdc_state_t; 174 175 /* 176 * The states that the vdc instance can be in. 177 */ 178 typedef enum vdc_lc_state { 179 VDC_LC_ATTACHING, /* driver is attaching */ 180 VDC_LC_ONLINE, /* driver is attached and online */ 181 VDC_LC_DETACHING /* driver is detaching */ 182 } vdc_lc_state_t; 183 184 /* 185 * Local Descriptor Ring entry 186 * 187 * vdc creates a Local (private) descriptor ring the same size as the 188 * public descriptor ring it exports to vds. 189 */ 190 191 typedef enum { 192 VIO_read_dir, /* read data from server */ 193 VIO_write_dir, /* write data to server */ 194 VIO_both_dir /* transfer both in and out in same buffer */ 195 } vio_desc_direction_t; 196 197 typedef enum { 198 CB_STRATEGY, /* non-blocking strategy call */ 199 CB_SYNC /* synchronous operation */ 200 } vio_cb_type_t; 201 202 typedef struct vdc_local_desc { 203 boolean_t is_free; /* local state - inuse or not */ 204 205 int operation; /* VD_OP_xxx to be performed */ 206 caddr_t addr; /* addr passed in by consumer */ 207 int slice; 208 diskaddr_t offset; /* disk offset */ 209 size_t nbytes; 210 vio_cb_type_t cb_type; /* operation type blk/nonblk */ 211 void *cb_arg; /* buf passed to strategy() */ 212 vio_desc_direction_t dir; /* direction of transfer */ 213 214 caddr_t align_addr; /* used if addr non-aligned */ 215 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */ 216 vd_dring_entry_t *dep; /* public Dring Entry Pointer */ 217 218 } vdc_local_desc_t; 219 220 /* 221 * I/O queue used by failfast 222 */ 223 typedef struct vdc_io { 224 struct vdc_io *vio_next; /* next pending I/O in the queue */ 225 struct buf *vio_buf; /* buf for CB_STRATEGY I/O */ 226 clock_t vio_qtime; /* time the I/O was queued */ 227 } vdc_io_t; 228 229 /* 230 * Per vDisk server channel states 231 */ 232 #define VDC_LDC_INIT 0x0001 233 #define VDC_LDC_CB 0x0002 234 #define VDC_LDC_OPEN 0x0004 235 #define VDC_LDC (VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN) 236 237 /* 238 * vDisk server information 239 */ 240 typedef struct vdc_server { 241 struct vdc_server *next; /* Next server */ 242 struct vdc *vdcp; /* Ptr to vdc struct */ 243 uint64_t id; /* Server port id */ 244 uint64_t state; /* Server state */ 245 uint64_t ldc_id; /* Server LDC id */ 246 ldc_handle_t ldc_handle; /* Server LDC handle */ 247 ldc_status_t ldc_state; /* Server LDC state */ 248 uint64_t ctimeout; /* conn tmout (secs) */ 249 } vdc_server_t; 250 251 /* 252 * vdc soft state structure 253 */ 254 typedef struct vdc { 255 256 kmutex_t lock; /* protects next 2 sections of vars */ 257 kcondvar_t running_cv; /* signal when upper layers can send */ 258 kcondvar_t initwait_cv; /* signal when ldc conn is up */ 259 kcondvar_t dring_free_cv; /* signal when desc is avail */ 260 kcondvar_t membind_cv; /* signal when mem can be bound */ 261 boolean_t self_reset; 262 263 int initialized; /* keeps track of what's init'ed */ 264 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ 265 266 int hshake_cnt; /* number of failed handshakes */ 267 uint8_t open[OTYPCNT]; /* mask of opened slices */ 268 uint8_t open_excl; /* mask of exclusively opened slices */ 269 ulong_t open_lyr[V_NUMPAR]; /* number of layered opens */ 270 int dkio_flush_pending; /* # outstanding DKIO flushes */ 271 int validate_pending; /* # outstanding validate request */ 272 vd_disk_label_t vdisk_label; /* label type of device/disk imported */ 273 struct extvtoc *vtoc; /* structure to store VTOC data */ 274 struct dk_geom *geom; /* structure to store geometry data */ 275 vd_slice_t slice[V_NUMPAR]; /* logical partitions */ 276 277 kthread_t *msg_proc_thr; /* main msg processing thread */ 278 279 kmutex_t read_lock; /* lock to protect read */ 280 kcondvar_t read_cv; /* cv to wait for READ events */ 281 vdc_rd_state_t read_state; /* current read state */ 282 283 uint32_t sync_op_cnt; /* num of active sync operations */ 284 boolean_t sync_op_pending; /* sync operation is pending */ 285 boolean_t sync_op_blocked; /* blocked waiting to do sync op */ 286 uint32_t sync_op_status; /* status of sync operation */ 287 kcondvar_t sync_pending_cv; /* cv wait for sync op to finish */ 288 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */ 289 290 uint64_t session_id; /* common ID sent with all messages */ 291 uint64_t seq_num; /* most recent sequence num generated */ 292 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ 293 uint64_t req_id; /* Most recent Request ID generated */ 294 uint64_t req_id_proc; /* Last request ID processed by vdc */ 295 vdc_state_t state; /* Current disk client-server state */ 296 297 dev_info_t *dip; /* device info pointer */ 298 int instance; /* driver instance number */ 299 300 vio_ver_t ver; /* version number agreed with server */ 301 vd_disk_type_t vdisk_type; /* type of device/disk being imported */ 302 uint32_t vdisk_media; /* physical media type of vDisk */ 303 uint64_t vdisk_size; /* device size in blocks */ 304 uint64_t max_xfer_sz; /* maximum block size of a descriptor */ 305 uint64_t block_size; /* device block size used */ 306 uint64_t operations; /* bitmask of ops. server supports */ 307 struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */ 308 struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */ 309 ddi_devid_t devid; /* device id */ 310 boolean_t ctimeout_reached; /* connection timeout has expired */ 311 312 /* 313 * The ownership fields are protected by the lock mutex. The 314 * ownership_lock mutex is used to serialize ownership operations; 315 * it should be acquired before the lock mutex. 316 */ 317 kmutex_t ownership_lock; /* serialize ownership ops */ 318 int ownership; /* ownership status flags */ 319 kthread_t *ownership_thread; /* ownership thread */ 320 kcondvar_t ownership_cv; /* cv for ownership update */ 321 322 /* 323 * The failfast fields are protected by the lock mutex. 324 */ 325 kthread_t *failfast_thread; /* failfast thread */ 326 clock_t failfast_interval; /* interval in microsecs */ 327 kcondvar_t failfast_cv; /* cv for failfast update */ 328 kcondvar_t failfast_io_cv; /* cv wait for I/O to finish */ 329 vdc_io_t *failfast_io_queue; /* failfast io queue */ 330 331 /* 332 * kstats used to store I/O statistics consumed by iostat(1M). 333 * These are protected by the lock mutex. 334 */ 335 kstat_t *io_stats; 336 kstat_t *err_stats; 337 338 ldc_dring_handle_t dring_hdl; /* dring handle */ 339 ldc_mem_info_t dring_mem_info; /* dring information */ 340 uint_t dring_curr_idx; /* current index */ 341 uint32_t dring_len; /* dring length */ 342 uint32_t dring_max_cookies; /* dring max cookies */ 343 uint32_t dring_cookie_count; /* num cookies */ 344 uint32_t dring_entry_size; /* descriptor size */ 345 ldc_mem_cookie_t *dring_cookie; /* dring cookies */ 346 uint64_t dring_ident; /* dring ident */ 347 348 uint64_t threads_pending; /* num of threads */ 349 350 vdc_local_desc_t *local_dring; /* local dring */ 351 vdc_local_desc_t *local_dring_backup; /* local dring backup */ 352 int local_dring_backup_tail; /* backup dring tail */ 353 int local_dring_backup_len; /* backup dring len */ 354 355 int num_servers; /* no. of servers */ 356 vdc_server_t *server_list; /* vdisk server list */ 357 vdc_server_t *curr_server; /* curr vdisk server */ 358 } vdc_t; 359 360 /* 361 * Ownership status flags 362 */ 363 #define VDC_OWNERSHIP_NONE 0x00 /* no ownership wanted */ 364 #define VDC_OWNERSHIP_WANTED 0x01 /* ownership is wanted */ 365 #define VDC_OWNERSHIP_GRANTED 0x02 /* ownership has been granted */ 366 #define VDC_OWNERSHIP_RESET 0x04 /* ownership has been reset */ 367 368 /* 369 * Reservation conflict panic message 370 */ 371 #define VDC_RESV_CONFLICT_FMT_STR "Reservation Conflict\nDisk: " 372 #define VDC_RESV_CONFLICT_FMT_LEN (sizeof (VDC_RESV_CONFLICT_FMT_STR)) 373 374 /* 375 * Debugging macros 376 */ 377 #ifdef DEBUG 378 extern int vdc_msglevel; 379 extern uint64_t vdc_matchinst; 380 381 #define DMSG(_vdc, err_level, format, ...) \ 382 do { \ 383 if (vdc_msglevel > err_level && \ 384 (vdc_matchinst & (1ull << (_vdc)->instance))) \ 385 cmn_err(CE_CONT, "?[%d,t@%p] %s: "format, \ 386 (_vdc)->instance, (void *)curthread, \ 387 __func__, __VA_ARGS__); \ 388 _NOTE(CONSTANTCONDITION) \ 389 } while (0); 390 391 #define DMSGX(err_level, format, ...) \ 392 do { \ 393 if (vdc_msglevel > err_level) \ 394 cmn_err(CE_CONT, "?%s: "format, __func__, __VA_ARGS__);\ 395 _NOTE(CONSTANTCONDITION) \ 396 } while (0); 397 398 #define VDC_DUMP_DRING_MSG(dmsgp) \ 399 DMSGX(0, "sq:%lu start:%d end:%d ident:%lu\n", \ 400 dmsgp->seq_num, dmsgp->start_idx, \ 401 dmsgp->end_idx, dmsgp->dring_ident); 402 403 #else /* !DEBUG */ 404 #define DMSG(err_level, ...) 405 #define DMSGX(err_level, format, ...) 406 #define VDC_DUMP_DRING_MSG(dmsgp) 407 408 #endif /* !DEBUG */ 409 410 #ifdef __cplusplus 411 } 412 #endif 413 414 #endif /* _VDC_H */ 415