1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #ifndef _VDC_H 27 #define _VDC_H 28 29 /* 30 * Virtual disk client implementation definitions 31 */ 32 33 #include <sys/sysmacros.h> 34 #include <sys/note.h> 35 36 #include <sys/ldc.h> 37 #include <sys/vio_mailbox.h> 38 #include <sys/vdsk_mailbox.h> 39 #include <sys/vdsk_common.h> 40 41 #ifdef __cplusplus 42 extern "C" { 43 #endif 44 45 #define VDC_DRIVER_NAME "vdc" 46 47 /* 48 * Bit-field values to indicate if parts of the vdc driver are initialised. 49 */ 50 #define VDC_SOFT_STATE 0x0001 51 #define VDC_LOCKS 0x0002 52 #define VDC_MINOR 0x0004 53 #define VDC_THREAD 0x0008 54 #define VDC_DRING_INIT 0x0010 /* The DRing was created */ 55 #define VDC_DRING_BOUND 0x0020 /* The DRing was bound to an LDC channel */ 56 #define VDC_DRING_LOCAL 0x0040 /* The local private DRing was allocated */ 57 #define VDC_DRING_ENTRY 0x0080 /* At least one DRing entry was initialised */ 58 #define VDC_DRING (VDC_DRING_INIT | VDC_DRING_BOUND | \ 59 VDC_DRING_LOCAL | VDC_DRING_ENTRY) 60 #define VDC_HANDSHAKE 0x0100 /* Indicates if a handshake is in progress */ 61 #define VDC_HANDSHAKE_STOP 0x0200 /* stop further handshakes */ 62 63 /* 64 * Definitions of MD nodes/properties. 65 */ 66 #define VDC_MD_CHAN_NAME "channel-endpoint" 67 #define VDC_MD_VDEV_NAME "virtual-device" 68 #define VDC_MD_PORT_NAME "virtual-device-port" 69 #define VDC_MD_DISK_NAME "disk" 70 #define VDC_MD_CFG_HDL "cfg-handle" 71 #define VDC_MD_TIMEOUT "vdc-timeout" 72 #define VDC_MD_ID "id" 73 74 /* 75 * Definition of actions to be carried out when processing the sequence ID 76 * of a message received from the vDisk server. The function verifying the 77 * sequence number checks the 'seq_num_xxx' fields in the soft state and 78 * returns whether the message should be processed (VDC_SEQ_NUM_TODO) or 79 * whether it was it was previously processed (VDC_SEQ_NUM_SKIP). 80 */ 81 #define VDC_SEQ_NUM_INVALID -1 /* Error */ 82 #define VDC_SEQ_NUM_SKIP 0 /* Request already processed */ 83 #define VDC_SEQ_NUM_TODO 1 /* Request needs processing */ 84 85 /* 86 * DRing reserved entries. Entry 0 is reserved and only used for error 87 * checking. This is done so that error checking can be done even if the 88 * DRing is full. All other entries are available for regular I/Os. 89 */ 90 #define VDC_DRING_NUM_RESV 1 /* #reserved entries */ 91 #define VDC_DRING_FIRST_RESV 0 /* 1st reserved entry */ 92 #define VDC_DRING_FIRST_ENTRY \ 93 (VDC_DRING_FIRST_RESV + VDC_DRING_NUM_RESV) /* 1st non-resv entry */ 94 95 /* 96 * Flags for virtual disk operations. 97 */ 98 #define VDC_OP_STATE_RUNNING 0x01 /* do operation in running state */ 99 #define VDC_OP_ERRCHK_BACKEND 0x02 /* check backend on error */ 100 #define VDC_OP_ERRCHK_CONFLICT 0x04 /* check resv conflict on error */ 101 #define VDC_OP_DRING_RESERVED 0x08 /* use dring reserved entry */ 102 #define VDC_OP_RESUBMIT 0x10 /* I/O is being resubmitted */ 103 104 #define VDC_OP_ERRCHK (VDC_OP_ERRCHK_BACKEND | VDC_OP_ERRCHK_CONFLICT) 105 #define VDC_OP_NORMAL (VDC_OP_STATE_RUNNING | VDC_OP_ERRCHK) 106 107 /* 108 * Macros to get UNIT and PART number 109 */ 110 #define VDCUNIT_SHIFT 3 111 #define VDCPART_MASK 7 112 113 #define VDCUNIT(dev) (getminor((dev)) >> VDCUNIT_SHIFT) 114 #define VDCPART(dev) (getminor((dev)) & VDCPART_MASK) 115 116 /* 117 * Scheme to store the instance number and the slice number in the minor number. 118 * (NOTE: Uses the same format and definitions as the sd(4D) driver) 119 */ 120 #define VD_MAKE_DEV(instance, minor) ((instance << VDCUNIT_SHIFT) | minor) 121 122 #define VDC_EFI_DEV_SET(dev, vdsk, ioctl) \ 123 VDSK_EFI_DEV_SET(dev, vdsk, ioctl, \ 124 (vdsk)->vdisk_bsize, (vdsk)->vdisk_size) 125 126 /* max number of handshake retries per server */ 127 #define VDC_HSHAKE_RETRIES 3 128 129 /* minimum number of attribute negotiations before handshake failure */ 130 #define VDC_HATTR_MIN_INITIAL 3 131 #define VDC_HATTR_MIN 1 132 133 /* 134 * This macro returns the number of Hz that the vdc driver should wait before 135 * a timeout is triggered. The 'timeout' parameter specifiecs the wait 136 * time in Hz. The 'mul' parameter allows for a multiplier to be 137 * specified allowing for a backoff to be implemented (e.g. using the 138 * retry number as a multiplier) where the wait time will get longer if 139 * there is no response on the previous retry. 140 */ 141 #define VD_GET_TIMEOUT_HZ(timeout, mul) \ 142 (ddi_get_lbolt() + ((timeout) * MAX(1, (mul)))) 143 144 /* 145 * Macros to manipulate Descriptor Ring variables in the soft state 146 * structure. 147 */ 148 #define VDC_GET_NEXT_REQ_ID(vdc) ((vdc)->req_id++) 149 150 #define VDC_GET_DRING_ENTRY_PTR(vdc, idx) \ 151 (vd_dring_entry_t *)(uintptr_t)((vdc)->dring_mem_info.vaddr + \ 152 (idx * (vdc)->dring_entry_size)) 153 154 #define VDC_MARK_DRING_ENTRY_FREE(vdc, idx) \ 155 { \ 156 vd_dring_entry_t *dep = NULL; \ 157 ASSERT(vdc != NULL); \ 158 ASSERT(idx < vdc->dring_len); \ 159 ASSERT(vdc->dring_mem_info.vaddr != NULL); \ 160 dep = (vd_dring_entry_t *)(uintptr_t) \ 161 (vdc->dring_mem_info.vaddr + \ 162 (idx * vdc->dring_entry_size)); \ 163 ASSERT(dep != NULL); \ 164 dep->hdr.dstate = VIO_DESC_FREE; \ 165 } 166 167 /* Initialise the Session ID and Sequence Num in the DRing msg */ 168 #define VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdc) \ 169 ASSERT(vdc != NULL); \ 170 dmsg.tag.vio_sid = vdc->session_id; \ 171 dmsg.seq_num = vdc->seq_num; 172 173 /* 174 * The states that the read thread can be in. 175 */ 176 typedef enum vdc_rd_state { 177 VDC_READ_IDLE, /* idling - conn is not up */ 178 VDC_READ_WAITING, /* waiting for data */ 179 VDC_READ_PENDING, /* pending data avail for read */ 180 VDC_READ_RESET /* channel was reset - stop reads */ 181 } vdc_rd_state_t; 182 183 /* 184 * The states that the vdc-vds connection can be in. 185 */ 186 typedef enum vdc_state { 187 VDC_STATE_INIT, /* device is initialized */ 188 VDC_STATE_INIT_WAITING, /* waiting for ldc connection */ 189 VDC_STATE_NEGOTIATE, /* doing handshake negotiation */ 190 VDC_STATE_HANDLE_PENDING, /* handle requests in backup dring */ 191 VDC_STATE_FAULTED, /* multipath backend is inaccessible */ 192 VDC_STATE_FAILED, /* device is not usable */ 193 VDC_STATE_RUNNING, /* running and accepting requests */ 194 VDC_STATE_DETACH, /* detaching */ 195 VDC_STATE_RESETTING /* resetting connection with vds */ 196 } vdc_state_t; 197 198 /* 199 * States of the service provided by a vds server 200 */ 201 typedef enum vdc_service_state { 202 VDC_SERVICE_NONE = -1, /* no state define */ 203 VDC_SERVICE_OFFLINE, /* no connection with the service */ 204 VDC_SERVICE_CONNECTED, /* connection established */ 205 VDC_SERVICE_ONLINE, /* connection and backend available */ 206 VDC_SERVICE_FAILED, /* connection failed */ 207 VDC_SERVICE_FAULTED /* connection but backend unavailable */ 208 } vdc_service_state_t; 209 210 /* 211 * The states that the vdc instance can be in. 212 */ 213 typedef enum vdc_lc_state { 214 VDC_LC_ATTACHING, /* driver is attaching */ 215 VDC_LC_ONLINE_PENDING, /* driver is attached, handshake pending */ 216 VDC_LC_ONLINE, /* driver is attached and online */ 217 VDC_LC_DETACHING /* driver is detaching */ 218 } vdc_lc_state_t; 219 220 /* 221 * Local Descriptor Ring entry 222 * 223 * vdc creates a Local (private) descriptor ring the same size as the 224 * public descriptor ring it exports to vds. 225 */ 226 227 typedef enum { 228 VIO_read_dir, /* read data from server */ 229 VIO_write_dir, /* write data to server */ 230 VIO_both_dir /* transfer both in and out in same buffer */ 231 } vio_desc_direction_t; 232 233 typedef struct vdc_local_desc { 234 boolean_t is_free; /* local state - inuse or not */ 235 236 int operation; /* VD_OP_xxx to be performed */ 237 caddr_t addr; /* addr passed in by consumer */ 238 int slice; 239 diskaddr_t offset; /* disk offset */ 240 size_t nbytes; 241 struct buf *buf; /* buf of operation */ 242 vio_desc_direction_t dir; /* direction of transfer */ 243 int flags; /* flags of operation */ 244 245 caddr_t align_addr; /* used if addr non-aligned */ 246 ldc_mem_handle_t desc_mhdl; /* Mem handle of buf */ 247 vd_dring_entry_t *dep; /* public Dring Entry Pointer */ 248 249 } vdc_local_desc_t; 250 251 /* 252 * I/O queue used for checking backend or failfast 253 */ 254 typedef struct vdc_io { 255 struct vdc_io *vio_next; /* next pending I/O in the queue */ 256 int vio_index; /* descriptor index */ 257 clock_t vio_qtime; /* time the I/O was queued */ 258 } vdc_io_t; 259 260 /* 261 * Per vDisk server channel states 262 */ 263 #define VDC_LDC_INIT 0x0001 264 #define VDC_LDC_CB 0x0002 265 #define VDC_LDC_OPEN 0x0004 266 #define VDC_LDC (VDC_LDC_INIT | VDC_LDC_CB | VDC_LDC_OPEN) 267 268 /* 269 * vDisk server information 270 */ 271 typedef struct vdc_server { 272 struct vdc_server *next; /* Next server */ 273 struct vdc *vdcp; /* Ptr to vdc struct */ 274 uint64_t id; /* Server port id */ 275 uint64_t state; /* Server state */ 276 vdc_service_state_t svc_state; /* Service state */ 277 vdc_service_state_t log_state; /* Last state logged */ 278 uint64_t ldc_id; /* Server LDC id */ 279 ldc_handle_t ldc_handle; /* Server LDC handle */ 280 ldc_status_t ldc_state; /* Server LDC state */ 281 uint64_t ctimeout; /* conn tmout (secs) */ 282 uint_t hshake_cnt; /* handshakes count */ 283 uint_t hattr_cnt; /* attr. neg. count */ 284 uint_t hattr_total; /* attr. neg. total */ 285 } vdc_server_t; 286 287 /* 288 * vdc soft state structure 289 */ 290 typedef struct vdc { 291 292 kmutex_t lock; /* protects next 2 sections of vars */ 293 kcondvar_t running_cv; /* signal when upper layers can send */ 294 kcondvar_t initwait_cv; /* signal when ldc conn is up */ 295 kcondvar_t dring_free_cv; /* signal when desc is avail */ 296 kcondvar_t membind_cv; /* signal when mem can be bound */ 297 boolean_t self_reset; /* self initiated reset */ 298 kcondvar_t io_pending_cv; /* signal on pending I/O */ 299 boolean_t io_pending; /* pending I/O */ 300 301 int initialized; /* keeps track of what's init'ed */ 302 vdc_lc_state_t lifecycle; /* Current state of the vdc instance */ 303 uint_t hattr_min; /* min. # attribute negotiations */ 304 305 uint8_t open[OTYPCNT]; /* mask of opened slices */ 306 uint8_t open_excl; /* mask of exclusively opened slices */ 307 ulong_t open_lyr[V_NUMPAR]; /* number of layered opens */ 308 int dkio_flush_pending; /* # outstanding DKIO flushes */ 309 int validate_pending; /* # outstanding validate request */ 310 vd_disk_label_t vdisk_label; /* label type of device/disk imported */ 311 struct extvtoc *vtoc; /* structure to store VTOC data */ 312 struct dk_geom *geom; /* structure to store geometry data */ 313 vd_slice_t slice[V_NUMPAR]; /* logical partitions */ 314 315 kthread_t *msg_proc_thr; /* main msg processing thread */ 316 317 kmutex_t read_lock; /* lock to protect read */ 318 kcondvar_t read_cv; /* cv to wait for READ events */ 319 vdc_rd_state_t read_state; /* current read state */ 320 321 uint32_t sync_op_cnt; /* num of active sync operations */ 322 boolean_t sync_op_blocked; /* blocked waiting to do sync op */ 323 kcondvar_t sync_blocked_cv; /* cv wait for other syncs to finish */ 324 325 uint64_t session_id; /* common ID sent with all messages */ 326 uint64_t seq_num; /* most recent sequence num generated */ 327 uint64_t seq_num_reply; /* Last seq num ACK/NACK'ed by vds */ 328 uint64_t req_id; /* Most recent Request ID generated */ 329 uint64_t req_id_proc; /* Last request ID processed by vdc */ 330 vdc_state_t state; /* Current disk client-server state */ 331 332 dev_info_t *dip; /* device info pointer */ 333 int instance; /* driver instance number */ 334 335 vio_ver_t ver; /* version number agreed with server */ 336 vd_disk_type_t vdisk_type; /* type of device/disk being imported */ 337 uint32_t vdisk_media; /* physical media type of vDisk */ 338 uint64_t vdisk_size; /* device size in blocks */ 339 uint64_t max_xfer_sz; /* maximum block size of a descriptor */ 340 uint64_t vdisk_bsize; /* blk size for the virtual disk */ 341 uint32_t vio_bmask; /* mask to check vio blk alignment */ 342 int vio_bshift; /* shift for vio blk conversion */ 343 uint64_t operations; /* bitmask of ops. server supports */ 344 struct dk_cinfo *cinfo; /* structure to store DKIOCINFO data */ 345 struct dk_minfo *minfo; /* structure for DKIOCGMEDIAINFO data */ 346 ddi_devid_t devid; /* device id */ 347 boolean_t ctimeout_reached; /* connection timeout has expired */ 348 349 /* 350 * The ownership fields are protected by the lock mutex. The 351 * ownership_lock mutex is used to serialize ownership operations; 352 * it should be acquired before the lock mutex. 353 */ 354 kmutex_t ownership_lock; /* serialize ownership ops */ 355 int ownership; /* ownership status flags */ 356 kthread_t *ownership_thread; /* ownership thread */ 357 kcondvar_t ownership_cv; /* cv for ownership update */ 358 359 /* 360 * The eio and failfast fields are protected by the lock mutex. 361 */ 362 kthread_t *eio_thread; /* error io thread */ 363 kcondvar_t eio_cv; /* cv for eio thread update */ 364 vdc_io_t *eio_queue; /* error io queue */ 365 clock_t failfast_interval; /* interval in microsecs */ 366 367 /* 368 * kstats used to store I/O statistics consumed by iostat(8). 369 * These are protected by the lock mutex. 370 */ 371 kstat_t *io_stats; 372 kstat_t *err_stats; 373 374 ldc_dring_handle_t dring_hdl; /* dring handle */ 375 ldc_mem_info_t dring_mem_info; /* dring information */ 376 uint_t dring_curr_idx; /* current index */ 377 uint32_t dring_len; /* dring length */ 378 uint32_t dring_max_cookies; /* dring max cookies */ 379 uint32_t dring_cookie_count; /* num cookies */ 380 uint32_t dring_entry_size; /* descriptor size */ 381 ldc_mem_cookie_t *dring_cookie; /* dring cookies */ 382 uint64_t dring_ident; /* dring ident */ 383 384 uint64_t threads_pending; /* num of threads */ 385 386 vdc_local_desc_t *local_dring; /* local dring */ 387 vdc_local_desc_t *local_dring_backup; /* local dring backup */ 388 int local_dring_backup_tail; /* backup dring tail */ 389 int local_dring_backup_len; /* backup dring len */ 390 391 int num_servers; /* no. of servers */ 392 vdc_server_t *server_list; /* vdisk server list */ 393 vdc_server_t *curr_server; /* curr vdisk server */ 394 } vdc_t; 395 396 /* 397 * Ownership status flags 398 */ 399 #define VDC_OWNERSHIP_NONE 0x00 /* no ownership wanted */ 400 #define VDC_OWNERSHIP_WANTED 0x01 /* ownership is wanted */ 401 #define VDC_OWNERSHIP_GRANTED 0x02 /* ownership has been granted */ 402 #define VDC_OWNERSHIP_RESET 0x04 /* ownership has been reset */ 403 404 /* 405 * Reservation conflict panic message 406 */ 407 #define VDC_RESV_CONFLICT_FMT_STR "Reservation Conflict\nDisk: " 408 #define VDC_RESV_CONFLICT_FMT_LEN (sizeof (VDC_RESV_CONFLICT_FMT_STR)) 409 410 /* 411 * Debugging macros 412 */ 413 #ifdef DEBUG 414 extern int vdc_msglevel; 415 extern uint64_t vdc_matchinst; 416 417 #define DMSG(_vdc, err_level, format, ...) \ 418 do { \ 419 if (vdc_msglevel > err_level && \ 420 (vdc_matchinst & (1ull << (_vdc)->instance))) \ 421 cmn_err(CE_CONT, "?[%d,t@%p] %s: "format, \ 422 (_vdc)->instance, (void *)curthread, \ 423 __func__, __VA_ARGS__); \ 424 _NOTE(CONSTANTCONDITION) \ 425 } while (0); 426 427 #define DMSGX(err_level, format, ...) \ 428 do { \ 429 if (vdc_msglevel > err_level) \ 430 cmn_err(CE_CONT, "?%s: "format, __func__, __VA_ARGS__);\ 431 _NOTE(CONSTANTCONDITION) \ 432 } while (0); 433 434 #define VDC_DUMP_DRING_MSG(dmsgp) \ 435 DMSGX(0, "sq:%lu start:%d end:%d ident:%lu\n", \ 436 dmsgp->seq_num, dmsgp->start_idx, \ 437 dmsgp->end_idx, dmsgp->dring_ident); 438 439 #else /* !DEBUG */ 440 #define DMSG(err_level, ...) 441 #define DMSGX(err_level, format, ...) 442 #define VDC_DUMP_DRING_MSG(dmsgp) 443 444 #endif /* !DEBUG */ 445 446 #ifdef __cplusplus 447 } 448 #endif 449 450 #endif /* _VDC_H */ 451