1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/kstat.h> 69 #include <sys/mach_descrip.h> 70 #include <sys/modctl.h> 71 #include <sys/mdeg.h> 72 #include <sys/note.h> 73 #include <sys/open.h> 74 #include <sys/sdt.h> 75 #include <sys/stat.h> 76 #include <sys/sunddi.h> 77 #include <sys/types.h> 78 #include <sys/promif.h> 79 #include <sys/var.h> 80 #include <sys/vtoc.h> 81 #include <sys/archsystm.h> 82 #include <sys/sysmacros.h> 83 84 #include <sys/cdio.h> 85 #include <sys/dktp/fdisk.h> 86 #include <sys/dktp/dadkio.h> 87 #include <sys/mhd.h> 88 #include <sys/scsi/generic/sense.h> 89 #include <sys/scsi/impl/uscsi.h> 90 #include <sys/scsi/impl/services.h> 91 #include <sys/scsi/targets/sddef.h> 92 93 #include <sys/ldoms.h> 94 #include <sys/ldc.h> 95 #include <sys/vio_common.h> 96 #include <sys/vio_mailbox.h> 97 #include <sys/vio_util.h> 98 #include <sys/vdsk_common.h> 99 #include <sys/vdsk_mailbox.h> 100 #include <sys/vdc.h> 101 102 /* 103 * function prototypes 104 */ 105 106 /* standard driver functions */ 107 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 108 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 109 static int vdc_strategy(struct buf *buf); 110 static int vdc_print(dev_t dev, char *str); 111 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 112 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 113 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 114 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 115 cred_t *credp, int *rvalp); 116 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 117 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 118 119 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 120 void *arg, void **resultp); 121 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 122 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 123 124 /* setup */ 125 static void vdc_min(struct buf *bufp); 126 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 127 static int vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node); 128 static int vdc_start_ldc_connection(vdc_t *vdc); 129 static int vdc_create_device_nodes(vdc_t *vdc); 130 static int vdc_create_device_nodes_efi(vdc_t *vdc); 131 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 132 static int vdc_create_device_nodes_props(vdc_t *vdc); 133 static void vdc_create_io_kstats(vdc_t *vdc); 134 static void vdc_create_err_kstats(vdc_t *vdc); 135 static void vdc_set_err_kstats(vdc_t *vdc); 136 static int vdc_get_md_node(dev_info_t *dip, md_t **mdpp, 137 mde_cookie_t *vd_nodep, mde_cookie_t *vd_portp); 138 static int vdc_get_ldc_id(md_t *, mde_cookie_t, uint64_t *); 139 static int vdc_do_ldc_up(vdc_t *vdc); 140 static void vdc_terminate_ldc(vdc_t *vdc); 141 static int vdc_init_descriptor_ring(vdc_t *vdc); 142 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 143 static int vdc_setup_devid(vdc_t *vdc); 144 static void vdc_store_label_efi(vdc_t *, efi_gpt_t *, efi_gpe_t *); 145 static void vdc_store_label_vtoc(vdc_t *, struct dk_geom *, struct vtoc *); 146 static void vdc_store_label_unk(vdc_t *vdc); 147 static boolean_t vdc_is_opened(vdc_t *vdc); 148 149 /* handshake with vds */ 150 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 151 static int vdc_ver_negotiation(vdc_t *vdcp); 152 static int vdc_init_attr_negotiation(vdc_t *vdc); 153 static int vdc_attr_negotiation(vdc_t *vdcp); 154 static int vdc_init_dring_negotiate(vdc_t *vdc); 155 static int vdc_dring_negotiation(vdc_t *vdcp); 156 static int vdc_send_rdx(vdc_t *vdcp); 157 static int vdc_rdx_exchange(vdc_t *vdcp); 158 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 159 160 /* processing incoming messages from vDisk server */ 161 static void vdc_process_msg_thread(vdc_t *vdc); 162 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 163 164 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 165 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 166 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 167 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 168 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 169 static int vdc_send_request(vdc_t *vdcp, int operation, 170 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 171 int cb_type, void *cb_arg, vio_desc_direction_t dir); 172 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 173 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 174 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 175 int cb_type, void *cb_arg, vio_desc_direction_t dir); 176 static int vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, 177 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 178 void *cb_arg, vio_desc_direction_t dir, boolean_t); 179 180 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 181 static int vdc_drain_response(vdc_t *vdcp); 182 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 183 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 184 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 185 186 /* dkio */ 187 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, 188 int *rvalp); 189 static int vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg); 190 static void vdc_create_fake_geometry(vdc_t *vdc); 191 static int vdc_validate_geometry(vdc_t *vdc); 192 static void vdc_validate(vdc_t *vdc); 193 static void vdc_validate_task(void *arg); 194 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 195 int mode, int dir); 196 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 197 int mode, int dir); 198 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 199 int mode, int dir); 200 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 201 int mode, int dir); 202 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 203 int mode, int dir); 204 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 205 int mode, int dir); 206 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 207 int mode, int dir); 208 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 209 int mode, int dir); 210 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 211 int mode, int dir); 212 213 static void vdc_ownership_update(vdc_t *vdc, int ownership_flags); 214 static int vdc_access_set(vdc_t *vdc, uint64_t flags, int mode); 215 static vdc_io_t *vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf); 216 static int vdc_failfast_check_resv(vdc_t *vdc); 217 218 /* 219 * Module variables 220 */ 221 222 /* 223 * Tunable variables to control how long vdc waits before timing out on 224 * various operations 225 */ 226 static int vdc_hshake_retries = 3; 227 228 static int vdc_timeout = 0; /* units: seconds */ 229 230 static uint64_t vdc_hz_min_ldc_delay; 231 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 232 static uint64_t vdc_hz_max_ldc_delay; 233 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 234 235 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 236 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 237 238 /* values for dumping - need to run in a tighter loop */ 239 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 240 static int vdc_dump_retries = 100; 241 242 static uint16_t vdc_scsi_timeout = 60; /* 60s units: seconds */ 243 244 static uint64_t vdc_ownership_delay = 6 * MICROSEC; /* 6s units: usec */ 245 246 /* Count of the number of vdc instances attached */ 247 static volatile uint32_t vdc_instance_count = 0; 248 249 /* Tunable to log all SCSI errors */ 250 static boolean_t vdc_scsi_log_error = B_FALSE; 251 252 /* Soft state pointer */ 253 static void *vdc_state; 254 255 /* 256 * Controlling the verbosity of the error/debug messages 257 * 258 * vdc_msglevel - controls level of messages 259 * vdc_matchinst - 64-bit variable where each bit corresponds 260 * to the vdc instance the vdc_msglevel applies. 261 */ 262 int vdc_msglevel = 0x0; 263 uint64_t vdc_matchinst = 0ull; 264 265 /* 266 * Supported vDisk protocol version pairs. 267 * 268 * The first array entry is the latest and preferred version. 269 */ 270 static const vio_ver_t vdc_version[] = {{1, 1}}; 271 272 static struct cb_ops vdc_cb_ops = { 273 vdc_open, /* cb_open */ 274 vdc_close, /* cb_close */ 275 vdc_strategy, /* cb_strategy */ 276 vdc_print, /* cb_print */ 277 vdc_dump, /* cb_dump */ 278 vdc_read, /* cb_read */ 279 vdc_write, /* cb_write */ 280 vdc_ioctl, /* cb_ioctl */ 281 nodev, /* cb_devmap */ 282 nodev, /* cb_mmap */ 283 nodev, /* cb_segmap */ 284 nochpoll, /* cb_chpoll */ 285 ddi_prop_op, /* cb_prop_op */ 286 NULL, /* cb_str */ 287 D_MP | D_64BIT, /* cb_flag */ 288 CB_REV, /* cb_rev */ 289 vdc_aread, /* cb_aread */ 290 vdc_awrite /* cb_awrite */ 291 }; 292 293 static struct dev_ops vdc_ops = { 294 DEVO_REV, /* devo_rev */ 295 0, /* devo_refcnt */ 296 vdc_getinfo, /* devo_getinfo */ 297 nulldev, /* devo_identify */ 298 nulldev, /* devo_probe */ 299 vdc_attach, /* devo_attach */ 300 vdc_detach, /* devo_detach */ 301 nodev, /* devo_reset */ 302 &vdc_cb_ops, /* devo_cb_ops */ 303 NULL, /* devo_bus_ops */ 304 nulldev /* devo_power */ 305 }; 306 307 static struct modldrv modldrv = { 308 &mod_driverops, 309 "virtual disk client", 310 &vdc_ops, 311 }; 312 313 static struct modlinkage modlinkage = { 314 MODREV_1, 315 &modldrv, 316 NULL 317 }; 318 319 /* -------------------------------------------------------------------------- */ 320 321 /* 322 * Device Driver housekeeping and setup 323 */ 324 325 int 326 _init(void) 327 { 328 int status; 329 330 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 331 return (status); 332 if ((status = mod_install(&modlinkage)) != 0) 333 ddi_soft_state_fini(&vdc_state); 334 return (status); 335 } 336 337 int 338 _info(struct modinfo *modinfop) 339 { 340 return (mod_info(&modlinkage, modinfop)); 341 } 342 343 int 344 _fini(void) 345 { 346 int status; 347 348 if ((status = mod_remove(&modlinkage)) != 0) 349 return (status); 350 ddi_soft_state_fini(&vdc_state); 351 return (0); 352 } 353 354 static int 355 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 356 { 357 _NOTE(ARGUNUSED(dip)) 358 359 int instance = VDCUNIT((dev_t)arg); 360 vdc_t *vdc = NULL; 361 362 switch (cmd) { 363 case DDI_INFO_DEVT2DEVINFO: 364 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 365 *resultp = NULL; 366 return (DDI_FAILURE); 367 } 368 *resultp = vdc->dip; 369 return (DDI_SUCCESS); 370 case DDI_INFO_DEVT2INSTANCE: 371 *resultp = (void *)(uintptr_t)instance; 372 return (DDI_SUCCESS); 373 default: 374 *resultp = NULL; 375 return (DDI_FAILURE); 376 } 377 } 378 379 static int 380 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 381 { 382 kt_did_t failfast_tid, ownership_tid; 383 int instance; 384 int rv; 385 vdc_t *vdc = NULL; 386 387 switch (cmd) { 388 case DDI_DETACH: 389 /* the real work happens below */ 390 break; 391 case DDI_SUSPEND: 392 /* nothing to do for this non-device */ 393 return (DDI_SUCCESS); 394 default: 395 return (DDI_FAILURE); 396 } 397 398 ASSERT(cmd == DDI_DETACH); 399 instance = ddi_get_instance(dip); 400 DMSGX(1, "[%d] Entered\n", instance); 401 402 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 403 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 404 return (DDI_FAILURE); 405 } 406 407 /* 408 * This function is called when vdc is detached or if it has failed to 409 * attach. In that case, the attach may have fail before the vdisk type 410 * has been set so we can't call vdc_is_opened(). However as the attach 411 * has failed, we know that the vdisk is not opened and we can safely 412 * detach. 413 */ 414 if (vdc->vdisk_type != VD_DISK_TYPE_UNK && vdc_is_opened(vdc)) { 415 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 416 return (DDI_FAILURE); 417 } 418 419 if (vdc->dkio_flush_pending) { 420 DMSG(vdc, 0, 421 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 422 instance, vdc->dkio_flush_pending); 423 return (DDI_FAILURE); 424 } 425 426 if (vdc->validate_pending) { 427 DMSG(vdc, 0, 428 "[%d] Cannot detach: %d outstanding validate request\n", 429 instance, vdc->validate_pending); 430 return (DDI_FAILURE); 431 } 432 433 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 434 435 /* If we took ownership, release ownership */ 436 mutex_enter(&vdc->ownership_lock); 437 if (vdc->ownership & VDC_OWNERSHIP_GRANTED) { 438 rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, FKIOCTL); 439 if (rv == 0) { 440 vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 441 } 442 } 443 mutex_exit(&vdc->ownership_lock); 444 445 /* mark instance as detaching */ 446 vdc->lifecycle = VDC_LC_DETACHING; 447 448 /* 449 * try and disable callbacks to prevent another handshake 450 */ 451 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 452 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 453 454 if (vdc->initialized & VDC_THREAD) { 455 mutex_enter(&vdc->read_lock); 456 if ((vdc->read_state == VDC_READ_WAITING) || 457 (vdc->read_state == VDC_READ_RESET)) { 458 vdc->read_state = VDC_READ_RESET; 459 cv_signal(&vdc->read_cv); 460 } 461 462 mutex_exit(&vdc->read_lock); 463 464 /* wake up any thread waiting for connection to come online */ 465 mutex_enter(&vdc->lock); 466 if (vdc->state == VDC_STATE_INIT_WAITING) { 467 DMSG(vdc, 0, 468 "[%d] write reset - move to resetting state...\n", 469 instance); 470 vdc->state = VDC_STATE_RESETTING; 471 cv_signal(&vdc->initwait_cv); 472 } 473 mutex_exit(&vdc->lock); 474 475 /* now wait until state transitions to VDC_STATE_DETACH */ 476 thread_join(vdc->msg_proc_thr->t_did); 477 ASSERT(vdc->state == VDC_STATE_DETACH); 478 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 479 vdc->instance); 480 } 481 482 mutex_enter(&vdc->lock); 483 484 if (vdc->initialized & VDC_DRING) 485 vdc_destroy_descriptor_ring(vdc); 486 487 if (vdc->initialized & VDC_LDC) 488 vdc_terminate_ldc(vdc); 489 490 if (vdc->failfast_thread) { 491 failfast_tid = vdc->failfast_thread->t_did; 492 vdc->failfast_interval = 0; 493 cv_signal(&vdc->failfast_cv); 494 } else { 495 failfast_tid = 0; 496 } 497 498 if (vdc->ownership & VDC_OWNERSHIP_WANTED) { 499 ownership_tid = vdc->ownership_thread->t_did; 500 vdc->ownership = VDC_OWNERSHIP_NONE; 501 cv_signal(&vdc->ownership_cv); 502 } else { 503 ownership_tid = 0; 504 } 505 506 mutex_exit(&vdc->lock); 507 508 if (failfast_tid != 0) 509 thread_join(failfast_tid); 510 511 if (ownership_tid != 0) 512 thread_join(ownership_tid); 513 514 if (vdc->initialized & VDC_MINOR) { 515 ddi_prop_remove_all(dip); 516 ddi_remove_minor_node(dip, NULL); 517 } 518 519 if (vdc->io_stats) { 520 kstat_delete(vdc->io_stats); 521 vdc->io_stats = NULL; 522 } 523 524 if (vdc->err_stats) { 525 kstat_delete(vdc->err_stats); 526 vdc->err_stats = NULL; 527 } 528 529 if (vdc->initialized & VDC_LOCKS) { 530 mutex_destroy(&vdc->lock); 531 mutex_destroy(&vdc->read_lock); 532 mutex_destroy(&vdc->ownership_lock); 533 cv_destroy(&vdc->initwait_cv); 534 cv_destroy(&vdc->dring_free_cv); 535 cv_destroy(&vdc->membind_cv); 536 cv_destroy(&vdc->sync_pending_cv); 537 cv_destroy(&vdc->sync_blocked_cv); 538 cv_destroy(&vdc->read_cv); 539 cv_destroy(&vdc->running_cv); 540 cv_destroy(&vdc->ownership_cv); 541 cv_destroy(&vdc->failfast_cv); 542 cv_destroy(&vdc->failfast_io_cv); 543 } 544 545 if (vdc->minfo) 546 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 547 548 if (vdc->cinfo) 549 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 550 551 if (vdc->vtoc) 552 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 553 554 if (vdc->geom) 555 kmem_free(vdc->geom, sizeof (struct dk_geom)); 556 557 if (vdc->devid) { 558 ddi_devid_unregister(dip); 559 ddi_devid_free(vdc->devid); 560 } 561 562 if (vdc->initialized & VDC_SOFT_STATE) 563 ddi_soft_state_free(vdc_state, instance); 564 565 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 566 567 return (DDI_SUCCESS); 568 } 569 570 571 static int 572 vdc_do_attach(dev_info_t *dip) 573 { 574 int instance; 575 vdc_t *vdc = NULL; 576 int status; 577 md_t *mdp; 578 mde_cookie_t vd_node, vd_port; 579 580 ASSERT(dip != NULL); 581 582 instance = ddi_get_instance(dip); 583 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 584 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 585 instance); 586 return (DDI_FAILURE); 587 } 588 589 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 590 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 591 return (DDI_FAILURE); 592 } 593 594 /* 595 * We assign the value to initialized in this case to zero out the 596 * variable and then set bits in it to indicate what has been done 597 */ 598 vdc->initialized = VDC_SOFT_STATE; 599 600 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 601 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 602 603 vdc->dip = dip; 604 vdc->instance = instance; 605 vdc->vdisk_type = VD_DISK_TYPE_UNK; 606 vdc->vdisk_label = VD_DISK_LABEL_UNK; 607 vdc->state = VDC_STATE_INIT; 608 vdc->lifecycle = VDC_LC_ATTACHING; 609 vdc->ldc_state = 0; 610 vdc->session_id = 0; 611 vdc->block_size = DEV_BSIZE; 612 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 613 614 /* 615 * We assume, for now, that the vDisk server will export 'read' 616 * operations to us at a minimum (this is needed because of checks 617 * in vdc for supported operations early in the handshake process). 618 * The vDisk server will return ENOTSUP if this is not the case. 619 * The value will be overwritten during the attribute exchange with 620 * the bitmask of operations exported by server. 621 */ 622 vdc->operations = VD_OP_MASK_READ; 623 624 vdc->vtoc = NULL; 625 vdc->geom = NULL; 626 vdc->cinfo = NULL; 627 vdc->minfo = NULL; 628 629 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 630 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 631 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 632 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 633 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 634 635 vdc->threads_pending = 0; 636 vdc->sync_op_pending = B_FALSE; 637 vdc->sync_op_blocked = B_FALSE; 638 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 639 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 640 641 mutex_init(&vdc->ownership_lock, NULL, MUTEX_DRIVER, NULL); 642 cv_init(&vdc->ownership_cv, NULL, CV_DRIVER, NULL); 643 cv_init(&vdc->failfast_cv, NULL, CV_DRIVER, NULL); 644 cv_init(&vdc->failfast_io_cv, NULL, CV_DRIVER, NULL); 645 646 /* init blocking msg read functionality */ 647 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 648 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 649 vdc->read_state = VDC_READ_IDLE; 650 651 vdc->initialized |= VDC_LOCKS; 652 653 /* get device and port MD node for this disk instance */ 654 if (vdc_get_md_node(dip, &mdp, &vd_node, &vd_port) != 0) { 655 cmn_err(CE_NOTE, "[%d] Could not get machine description node", 656 instance); 657 return (DDI_FAILURE); 658 } 659 660 /* set the connection timeout */ 661 if (vd_port == NULL || (md_get_prop_val(mdp, vd_port, 662 VDC_MD_TIMEOUT, &vdc->ctimeout) != 0)) { 663 vdc->ctimeout = 0; 664 } 665 666 /* initialise LDC channel which will be used to communicate with vds */ 667 status = vdc_do_ldc_init(vdc, mdp, vd_node); 668 669 (void) md_fini_handle(mdp); 670 671 if (status != 0) { 672 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 673 goto return_status; 674 } 675 676 /* initialize the thread responsible for managing state with server */ 677 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 678 vdc, 0, &p0, TS_RUN, minclsyspri); 679 if (vdc->msg_proc_thr == NULL) { 680 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 681 instance); 682 return (DDI_FAILURE); 683 } 684 685 vdc->initialized |= VDC_THREAD; 686 687 /* Create the kstats for saving the I/O statistics used by iostat(1M) */ 688 vdc_create_io_kstats(vdc); 689 vdc_create_err_kstats(vdc); 690 691 atomic_inc_32(&vdc_instance_count); 692 693 /* 694 * Check the disk label. This will send requests and do the handshake. 695 * We don't really care about the disk label now. What we really need is 696 * the handshake do be done so that we know the type of the disk (slice 697 * or full disk) and the appropriate device nodes can be created. 698 */ 699 vdc->vdisk_label = VD_DISK_LABEL_UNK; 700 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 701 vdc->geom = kmem_zalloc(sizeof (struct dk_geom), KM_SLEEP); 702 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 703 704 mutex_enter(&vdc->lock); 705 (void) vdc_validate_geometry(vdc); 706 mutex_exit(&vdc->lock); 707 708 /* 709 * Now that we have the device info we can create the 710 * device nodes and properties 711 */ 712 status = vdc_create_device_nodes(vdc); 713 if (status) { 714 DMSG(vdc, 0, "[%d] Failed to create device nodes", 715 instance); 716 goto return_status; 717 } 718 status = vdc_create_device_nodes_props(vdc); 719 if (status) { 720 DMSG(vdc, 0, "[%d] Failed to create device nodes" 721 " properties (%d)", instance, status); 722 goto return_status; 723 } 724 725 /* 726 * Setup devid 727 */ 728 if (vdc_setup_devid(vdc)) { 729 DMSG(vdc, 0, "[%d] No device id available\n", instance); 730 } 731 732 /* 733 * Fill in the fields of the error statistics kstat that were not 734 * available when creating the kstat 735 */ 736 vdc_set_err_kstats(vdc); 737 738 ddi_report_dev(dip); 739 vdc->lifecycle = VDC_LC_ONLINE; 740 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 741 742 return_status: 743 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 744 return (status); 745 } 746 747 static int 748 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 749 { 750 int status; 751 752 switch (cmd) { 753 case DDI_ATTACH: 754 if ((status = vdc_do_attach(dip)) != 0) 755 (void) vdc_detach(dip, DDI_DETACH); 756 return (status); 757 case DDI_RESUME: 758 /* nothing to do for this non-device */ 759 return (DDI_SUCCESS); 760 default: 761 return (DDI_FAILURE); 762 } 763 } 764 765 static int 766 vdc_do_ldc_init(vdc_t *vdc, md_t *mdp, mde_cookie_t vd_node) 767 { 768 int status = 0; 769 ldc_status_t ldc_state; 770 ldc_attr_t ldc_attr; 771 uint64_t ldc_id = 0; 772 773 ASSERT(vdc != NULL); 774 775 vdc->initialized |= VDC_LDC; 776 777 if ((status = vdc_get_ldc_id(mdp, vd_node, &ldc_id)) != 0) { 778 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 779 vdc->instance); 780 return (EIO); 781 } 782 783 DMSGX(0, "[%d] LDC id is 0x%lx\n", vdc->instance, ldc_id); 784 785 vdc->ldc_id = ldc_id; 786 787 ldc_attr.devclass = LDC_DEV_BLK; 788 ldc_attr.instance = vdc->instance; 789 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 790 ldc_attr.mtu = VD_LDC_MTU; 791 792 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 793 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 794 if (status != 0) { 795 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 796 vdc->instance, ldc_id, status); 797 return (status); 798 } 799 vdc->initialized |= VDC_LDC_INIT; 800 } 801 status = ldc_status(vdc->ldc_handle, &ldc_state); 802 if (status != 0) { 803 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 804 vdc->instance, status); 805 return (status); 806 } 807 vdc->ldc_state = ldc_state; 808 809 if ((vdc->initialized & VDC_LDC_CB) == 0) { 810 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 811 (caddr_t)vdc); 812 if (status != 0) { 813 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 814 vdc->instance, status); 815 return (status); 816 } 817 vdc->initialized |= VDC_LDC_CB; 818 } 819 820 vdc->initialized |= VDC_LDC; 821 822 /* 823 * At this stage we have initialised LDC, we will now try and open 824 * the connection. 825 */ 826 if (vdc->ldc_state == LDC_INIT) { 827 status = ldc_open(vdc->ldc_handle); 828 if (status != 0) { 829 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 830 vdc->instance, vdc->ldc_id, status); 831 return (status); 832 } 833 vdc->initialized |= VDC_LDC_OPEN; 834 } 835 836 return (status); 837 } 838 839 static int 840 vdc_start_ldc_connection(vdc_t *vdc) 841 { 842 int status = 0; 843 844 ASSERT(vdc != NULL); 845 846 ASSERT(MUTEX_HELD(&vdc->lock)); 847 848 status = vdc_do_ldc_up(vdc); 849 850 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 851 852 return (status); 853 } 854 855 static int 856 vdc_stop_ldc_connection(vdc_t *vdcp) 857 { 858 int status; 859 860 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 861 vdcp->state); 862 863 status = ldc_down(vdcp->ldc_handle); 864 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 865 866 vdcp->initialized &= ~VDC_HANDSHAKE; 867 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 868 869 return (status); 870 } 871 872 static void 873 vdc_create_io_kstats(vdc_t *vdc) 874 { 875 if (vdc->io_stats != NULL) { 876 DMSG(vdc, 0, "[%d] I/O kstat already exists\n", vdc->instance); 877 return; 878 } 879 880 vdc->io_stats = kstat_create(VDC_DRIVER_NAME, vdc->instance, NULL, 881 "disk", KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT); 882 if (vdc->io_stats != NULL) { 883 vdc->io_stats->ks_lock = &vdc->lock; 884 kstat_install(vdc->io_stats); 885 } else { 886 cmn_err(CE_NOTE, "[%d] Failed to create kstat: I/O statistics" 887 " will not be gathered", vdc->instance); 888 } 889 } 890 891 static void 892 vdc_create_err_kstats(vdc_t *vdc) 893 { 894 vd_err_stats_t *stp; 895 char kstatmodule_err[KSTAT_STRLEN]; 896 char kstatname[KSTAT_STRLEN]; 897 int ndata = (sizeof (vd_err_stats_t) / sizeof (kstat_named_t)); 898 int instance = vdc->instance; 899 900 if (vdc->err_stats != NULL) { 901 DMSG(vdc, 0, "[%d] ERR kstat already exists\n", vdc->instance); 902 return; 903 } 904 905 (void) snprintf(kstatmodule_err, sizeof (kstatmodule_err), 906 "%serr", VDC_DRIVER_NAME); 907 (void) snprintf(kstatname, sizeof (kstatname), 908 "%s%d,err", VDC_DRIVER_NAME, instance); 909 910 vdc->err_stats = kstat_create(kstatmodule_err, instance, kstatname, 911 "device_error", KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT); 912 913 if (vdc->err_stats == NULL) { 914 cmn_err(CE_NOTE, "[%d] Failed to create kstat: Error statistics" 915 " will not be gathered", instance); 916 return; 917 } 918 919 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 920 kstat_named_init(&stp->vd_softerrs, "Soft Errors", 921 KSTAT_DATA_UINT32); 922 kstat_named_init(&stp->vd_transerrs, "Transport Errors", 923 KSTAT_DATA_UINT32); 924 kstat_named_init(&stp->vd_protoerrs, "Protocol Errors", 925 KSTAT_DATA_UINT32); 926 kstat_named_init(&stp->vd_vid, "Vendor", 927 KSTAT_DATA_CHAR); 928 kstat_named_init(&stp->vd_pid, "Product", 929 KSTAT_DATA_CHAR); 930 kstat_named_init(&stp->vd_capacity, "Size", 931 KSTAT_DATA_ULONGLONG); 932 933 vdc->err_stats->ks_update = nulldev; 934 935 kstat_install(vdc->err_stats); 936 } 937 938 static void 939 vdc_set_err_kstats(vdc_t *vdc) 940 { 941 vd_err_stats_t *stp; 942 943 if (vdc->err_stats == NULL) 944 return; 945 946 mutex_enter(&vdc->lock); 947 948 stp = (vd_err_stats_t *)vdc->err_stats->ks_data; 949 ASSERT(stp != NULL); 950 951 stp->vd_capacity.value.ui64 = vdc->vdisk_size * vdc->block_size; 952 (void) strcpy(stp->vd_vid.value.c, "SUN"); 953 (void) strcpy(stp->vd_pid.value.c, "VDSK"); 954 955 mutex_exit(&vdc->lock); 956 } 957 958 static int 959 vdc_create_device_nodes_efi(vdc_t *vdc) 960 { 961 ddi_remove_minor_node(vdc->dip, "h"); 962 ddi_remove_minor_node(vdc->dip, "h,raw"); 963 964 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 965 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 966 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 967 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 968 vdc->instance); 969 return (EIO); 970 } 971 972 /* if any device node is created we set this flag */ 973 vdc->initialized |= VDC_MINOR; 974 975 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 976 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 977 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 978 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 979 vdc->instance); 980 return (EIO); 981 } 982 983 return (0); 984 } 985 986 static int 987 vdc_create_device_nodes_vtoc(vdc_t *vdc) 988 { 989 ddi_remove_minor_node(vdc->dip, "wd"); 990 ddi_remove_minor_node(vdc->dip, "wd,raw"); 991 992 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 993 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 994 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 995 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 996 vdc->instance); 997 return (EIO); 998 } 999 1000 /* if any device node is created we set this flag */ 1001 vdc->initialized |= VDC_MINOR; 1002 1003 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 1004 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 1005 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1006 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 1007 vdc->instance); 1008 return (EIO); 1009 } 1010 1011 return (0); 1012 } 1013 1014 /* 1015 * Function: 1016 * vdc_create_device_nodes 1017 * 1018 * Description: 1019 * This function creates the block and character device nodes under 1020 * /devices along with the node properties. It is called as part of 1021 * the attach(9E) of the instance during the handshake with vds after 1022 * vds has sent the attributes to vdc. 1023 * 1024 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 1025 * of 2 is used in keeping with the Solaris convention that slice 2 1026 * refers to a whole disk. Slices start at 'a' 1027 * 1028 * Parameters: 1029 * vdc - soft state pointer 1030 * 1031 * Return Values 1032 * 0 - Success 1033 * EIO - Failed to create node 1034 * EINVAL - Unknown type of disk exported 1035 */ 1036 static int 1037 vdc_create_device_nodes(vdc_t *vdc) 1038 { 1039 char name[sizeof ("s,raw")]; 1040 dev_info_t *dip = NULL; 1041 int instance, status; 1042 int num_slices = 1; 1043 int i; 1044 1045 ASSERT(vdc != NULL); 1046 1047 instance = vdc->instance; 1048 dip = vdc->dip; 1049 1050 switch (vdc->vdisk_type) { 1051 case VD_DISK_TYPE_DISK: 1052 num_slices = V_NUMPAR; 1053 break; 1054 case VD_DISK_TYPE_SLICE: 1055 num_slices = 1; 1056 break; 1057 case VD_DISK_TYPE_UNK: 1058 default: 1059 return (EINVAL); 1060 } 1061 1062 /* 1063 * Minor nodes are different for EFI disks: EFI disks do not have 1064 * a minor node 'g' for the minor number corresponding to slice 1065 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 1066 * representing the whole disk. 1067 */ 1068 for (i = 0; i < num_slices; i++) { 1069 1070 if (i == VD_EFI_WD_SLICE) { 1071 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 1072 status = vdc_create_device_nodes_efi(vdc); 1073 else 1074 status = vdc_create_device_nodes_vtoc(vdc); 1075 if (status != 0) 1076 return (status); 1077 continue; 1078 } 1079 1080 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 1081 if (ddi_create_minor_node(dip, name, S_IFBLK, 1082 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1083 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 1084 instance, name); 1085 return (EIO); 1086 } 1087 1088 /* if any device node is created we set this flag */ 1089 vdc->initialized |= VDC_MINOR; 1090 1091 (void) snprintf(name, sizeof (name), "%c%s", 'a' + i, ",raw"); 1092 1093 if (ddi_create_minor_node(dip, name, S_IFCHR, 1094 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 1095 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 1096 instance, name); 1097 return (EIO); 1098 } 1099 } 1100 1101 return (0); 1102 } 1103 1104 /* 1105 * Function: 1106 * vdc_create_device_nodes_props 1107 * 1108 * Description: 1109 * This function creates the block and character device nodes under 1110 * /devices along with the node properties. It is called as part of 1111 * the attach(9E) of the instance during the handshake with vds after 1112 * vds has sent the attributes to vdc. 1113 * 1114 * Parameters: 1115 * vdc - soft state pointer 1116 * 1117 * Return Values 1118 * 0 - Success 1119 * EIO - Failed to create device node property 1120 * EINVAL - Unknown type of disk exported 1121 */ 1122 static int 1123 vdc_create_device_nodes_props(vdc_t *vdc) 1124 { 1125 dev_info_t *dip = NULL; 1126 int instance; 1127 int num_slices = 1; 1128 int64_t size = 0; 1129 dev_t dev; 1130 int rv; 1131 int i; 1132 1133 ASSERT(vdc != NULL); 1134 1135 instance = vdc->instance; 1136 dip = vdc->dip; 1137 1138 switch (vdc->vdisk_type) { 1139 case VD_DISK_TYPE_DISK: 1140 num_slices = V_NUMPAR; 1141 break; 1142 case VD_DISK_TYPE_SLICE: 1143 num_slices = 1; 1144 break; 1145 case VD_DISK_TYPE_UNK: 1146 default: 1147 return (EINVAL); 1148 } 1149 1150 if (vdc->vdisk_label == VD_DISK_LABEL_UNK) { 1151 /* remove all properties */ 1152 for (i = 0; i < num_slices; i++) { 1153 dev = makedevice(ddi_driver_major(dip), 1154 VD_MAKE_DEV(instance, i)); 1155 (void) ddi_prop_remove(dev, dip, VDC_SIZE_PROP_NAME); 1156 (void) ddi_prop_remove(dev, dip, VDC_NBLOCKS_PROP_NAME); 1157 } 1158 return (0); 1159 } 1160 1161 for (i = 0; i < num_slices; i++) { 1162 dev = makedevice(ddi_driver_major(dip), 1163 VD_MAKE_DEV(instance, i)); 1164 1165 size = vdc->slice[i].nblocks * vdc->block_size; 1166 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 1167 instance, size, size / (1024 * 1024), 1168 vdc->slice[i].nblocks); 1169 1170 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 1171 if (rv != DDI_PROP_SUCCESS) { 1172 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 1173 instance, VDC_SIZE_PROP_NAME, size); 1174 return (EIO); 1175 } 1176 1177 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 1178 lbtodb(size)); 1179 if (rv != DDI_PROP_SUCCESS) { 1180 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 1181 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 1182 return (EIO); 1183 } 1184 } 1185 1186 return (0); 1187 } 1188 1189 /* 1190 * Function: 1191 * vdc_is_opened 1192 * 1193 * Description: 1194 * This function checks if any slice of a given virtual disk is 1195 * currently opened. 1196 * 1197 * Parameters: 1198 * vdc - soft state pointer 1199 * 1200 * Return Values 1201 * B_TRUE - at least one slice is opened. 1202 * B_FALSE - no slice is opened. 1203 */ 1204 static boolean_t 1205 vdc_is_opened(vdc_t *vdc) 1206 { 1207 int i, nslices; 1208 1209 switch (vdc->vdisk_type) { 1210 case VD_DISK_TYPE_DISK: 1211 nslices = V_NUMPAR; 1212 break; 1213 case VD_DISK_TYPE_SLICE: 1214 nslices = 1; 1215 break; 1216 case VD_DISK_TYPE_UNK: 1217 default: 1218 ASSERT(0); 1219 } 1220 1221 /* check if there's any layered open */ 1222 for (i = 0; i < nslices; i++) { 1223 if (vdc->open_lyr[i] > 0) 1224 return (B_TRUE); 1225 } 1226 1227 /* check if there is any other kind of open */ 1228 for (i = 0; i < OTYPCNT; i++) { 1229 if (vdc->open[i] != 0) 1230 return (B_TRUE); 1231 } 1232 1233 return (B_FALSE); 1234 } 1235 1236 static int 1237 vdc_mark_opened(vdc_t *vdc, int slice, int flag, int otyp) 1238 { 1239 uint8_t slicemask; 1240 int i; 1241 1242 ASSERT(otyp < OTYPCNT); 1243 ASSERT(slice < V_NUMPAR); 1244 ASSERT(MUTEX_HELD(&vdc->lock)); 1245 1246 slicemask = 1 << slice; 1247 1248 /* check if slice is already exclusively opened */ 1249 if (vdc->open_excl & slicemask) 1250 return (EBUSY); 1251 1252 /* if open exclusive, check if slice is already opened */ 1253 if (flag & FEXCL) { 1254 if (vdc->open_lyr[slice] > 0) 1255 return (EBUSY); 1256 for (i = 0; i < OTYPCNT; i++) { 1257 if (vdc->open[i] & slicemask) 1258 return (EBUSY); 1259 } 1260 vdc->open_excl |= slicemask; 1261 } 1262 1263 /* mark slice as opened */ 1264 if (otyp == OTYP_LYR) { 1265 vdc->open_lyr[slice]++; 1266 } else { 1267 vdc->open[otyp] |= slicemask; 1268 } 1269 1270 return (0); 1271 } 1272 1273 static void 1274 vdc_mark_closed(vdc_t *vdc, int slice, int flag, int otyp) 1275 { 1276 uint8_t slicemask; 1277 1278 ASSERT(otyp < OTYPCNT); 1279 ASSERT(slice < V_NUMPAR); 1280 ASSERT(MUTEX_HELD(&vdc->lock)); 1281 1282 slicemask = 1 << slice; 1283 1284 if (otyp == OTYP_LYR) { 1285 ASSERT(vdc->open_lyr[slice] > 0); 1286 vdc->open_lyr[slice]--; 1287 } else { 1288 vdc->open[otyp] &= ~slicemask; 1289 } 1290 1291 if (flag & FEXCL) 1292 vdc->open_excl &= ~slicemask; 1293 } 1294 1295 static int 1296 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 1297 { 1298 _NOTE(ARGUNUSED(cred)) 1299 1300 int instance, nodelay; 1301 int slice, status = 0; 1302 vdc_t *vdc; 1303 1304 ASSERT(dev != NULL); 1305 instance = VDCUNIT(*dev); 1306 1307 if (otyp >= OTYPCNT) 1308 return (EINVAL); 1309 1310 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1311 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1312 return (ENXIO); 1313 } 1314 1315 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 1316 getminor(*dev), flag, otyp); 1317 1318 slice = VDCPART(*dev); 1319 1320 nodelay = flag & (FNDELAY | FNONBLOCK); 1321 1322 if ((flag & FWRITE) && (!nodelay) && 1323 !(VD_OP_SUPPORTED(vdc->operations, VD_OP_BWRITE))) { 1324 return (EROFS); 1325 } 1326 1327 mutex_enter(&vdc->lock); 1328 1329 status = vdc_mark_opened(vdc, slice, flag, otyp); 1330 1331 if (status != 0) { 1332 mutex_exit(&vdc->lock); 1333 return (status); 1334 } 1335 1336 if (nodelay) { 1337 1338 /* don't resubmit a validate request if there's already one */ 1339 if (vdc->validate_pending > 0) { 1340 mutex_exit(&vdc->lock); 1341 return (0); 1342 } 1343 1344 /* call vdc_validate() asynchronously to avoid blocking */ 1345 if (taskq_dispatch(system_taskq, vdc_validate_task, 1346 (void *)vdc, TQ_NOSLEEP) == NULL) { 1347 vdc_mark_closed(vdc, slice, flag, otyp); 1348 mutex_exit(&vdc->lock); 1349 return (ENXIO); 1350 } 1351 1352 vdc->validate_pending++; 1353 mutex_exit(&vdc->lock); 1354 return (0); 1355 } 1356 1357 mutex_exit(&vdc->lock); 1358 1359 vdc_validate(vdc); 1360 1361 mutex_enter(&vdc->lock); 1362 1363 if (vdc->vdisk_label == VD_DISK_LABEL_UNK || 1364 vdc->slice[slice].nblocks == 0) { 1365 vdc_mark_closed(vdc, slice, flag, otyp); 1366 status = EIO; 1367 } 1368 1369 mutex_exit(&vdc->lock); 1370 1371 return (status); 1372 } 1373 1374 static int 1375 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 1376 { 1377 _NOTE(ARGUNUSED(cred)) 1378 1379 int instance; 1380 int slice; 1381 int rv, rval; 1382 vdc_t *vdc; 1383 1384 instance = VDCUNIT(dev); 1385 1386 if (otyp >= OTYPCNT) 1387 return (EINVAL); 1388 1389 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1390 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1391 return (ENXIO); 1392 } 1393 1394 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1395 1396 slice = VDCPART(dev); 1397 1398 /* 1399 * Attempt to flush the W$ on a close operation. If this is 1400 * not a supported IOCTL command or the backing device is read-only 1401 * do not fail the close operation. 1402 */ 1403 rv = vd_process_ioctl(dev, DKIOCFLUSHWRITECACHE, NULL, FKIOCTL, &rval); 1404 1405 if (rv != 0 && rv != ENOTSUP && rv != ENOTTY && rv != EROFS) { 1406 DMSG(vdc, 0, "[%d] flush failed with error %d on close\n", 1407 instance, rv); 1408 return (EIO); 1409 } 1410 1411 mutex_enter(&vdc->lock); 1412 vdc_mark_closed(vdc, slice, flag, otyp); 1413 mutex_exit(&vdc->lock); 1414 1415 return (0); 1416 } 1417 1418 static int 1419 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1420 { 1421 _NOTE(ARGUNUSED(credp)) 1422 1423 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode, rvalp)); 1424 } 1425 1426 static int 1427 vdc_print(dev_t dev, char *str) 1428 { 1429 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1430 return (0); 1431 } 1432 1433 static int 1434 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1435 { 1436 int rv; 1437 size_t nbytes = nblk * DEV_BSIZE; 1438 int instance = VDCUNIT(dev); 1439 vdc_t *vdc = NULL; 1440 1441 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1442 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1443 return (ENXIO); 1444 } 1445 1446 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1447 instance, nbytes, blkno, (void *)addr); 1448 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1449 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1450 if (rv) { 1451 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1452 return (rv); 1453 } 1454 1455 if (ddi_in_panic()) 1456 (void) vdc_drain_response(vdc); 1457 1458 DMSG(vdc, 0, "[%d] End\n", instance); 1459 1460 return (0); 1461 } 1462 1463 /* -------------------------------------------------------------------------- */ 1464 1465 /* 1466 * Disk access routines 1467 * 1468 */ 1469 1470 /* 1471 * vdc_strategy() 1472 * 1473 * Return Value: 1474 * 0: As per strategy(9E), the strategy() function must return 0 1475 * [ bioerror(9f) sets b_flags to the proper error code ] 1476 */ 1477 static int 1478 vdc_strategy(struct buf *buf) 1479 { 1480 int rv = -1; 1481 vdc_t *vdc = NULL; 1482 int instance = VDCUNIT(buf->b_edev); 1483 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1484 int slice; 1485 1486 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1487 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1488 bioerror(buf, ENXIO); 1489 biodone(buf); 1490 return (0); 1491 } 1492 1493 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1494 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1495 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1496 1497 bp_mapin(buf); 1498 1499 if ((long)buf->b_private == VD_SLICE_NONE) { 1500 /* I/O using an absolute disk offset */ 1501 slice = VD_SLICE_NONE; 1502 } else { 1503 slice = VDCPART(buf->b_edev); 1504 } 1505 1506 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1507 buf->b_bcount, slice, buf->b_lblkno, 1508 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1509 VIO_write_dir); 1510 1511 /* 1512 * If the request was successfully sent, the strategy call returns and 1513 * the ACK handler calls the bioxxx functions when the vDisk server is 1514 * done otherwise we handle the error here. 1515 */ 1516 if (rv) { 1517 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1518 bioerror(buf, rv); 1519 biodone(buf); 1520 } 1521 1522 return (0); 1523 } 1524 1525 /* 1526 * Function: 1527 * vdc_min 1528 * 1529 * Description: 1530 * Routine to limit the size of a data transfer. Used in 1531 * conjunction with physio(9F). 1532 * 1533 * Arguments: 1534 * bp - pointer to the indicated buf(9S) struct. 1535 * 1536 */ 1537 static void 1538 vdc_min(struct buf *bufp) 1539 { 1540 vdc_t *vdc = NULL; 1541 int instance = VDCUNIT(bufp->b_edev); 1542 1543 vdc = ddi_get_soft_state(vdc_state, instance); 1544 VERIFY(vdc != NULL); 1545 1546 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1547 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1548 } 1549 } 1550 1551 static int 1552 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1553 { 1554 _NOTE(ARGUNUSED(cred)) 1555 1556 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1557 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1558 } 1559 1560 static int 1561 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1562 { 1563 _NOTE(ARGUNUSED(cred)) 1564 1565 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1566 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1567 } 1568 1569 static int 1570 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1571 { 1572 _NOTE(ARGUNUSED(cred)) 1573 1574 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1575 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1576 } 1577 1578 static int 1579 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1580 { 1581 _NOTE(ARGUNUSED(cred)) 1582 1583 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1584 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1585 } 1586 1587 1588 /* -------------------------------------------------------------------------- */ 1589 1590 /* 1591 * Handshake support 1592 */ 1593 1594 1595 /* 1596 * Function: 1597 * vdc_init_ver_negotiation() 1598 * 1599 * Description: 1600 * 1601 * Arguments: 1602 * vdc - soft state pointer for this instance of the device driver. 1603 * 1604 * Return Code: 1605 * 0 - Success 1606 */ 1607 static int 1608 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1609 { 1610 vio_ver_msg_t pkt; 1611 size_t msglen = sizeof (pkt); 1612 int status = -1; 1613 1614 ASSERT(vdc != NULL); 1615 ASSERT(mutex_owned(&vdc->lock)); 1616 1617 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1618 1619 /* 1620 * set the Session ID to a unique value 1621 * (the lower 32 bits of the clock tick) 1622 */ 1623 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1624 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1625 1626 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1627 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1628 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1629 pkt.tag.vio_sid = vdc->session_id; 1630 pkt.dev_class = VDEV_DISK; 1631 pkt.ver_major = ver.major; 1632 pkt.ver_minor = ver.minor; 1633 1634 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1635 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1636 vdc->instance, status); 1637 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1638 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1639 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1640 status, msglen); 1641 if (msglen != sizeof (vio_ver_msg_t)) 1642 status = ENOMSG; 1643 } 1644 1645 return (status); 1646 } 1647 1648 /* 1649 * Function: 1650 * vdc_ver_negotiation() 1651 * 1652 * Description: 1653 * 1654 * Arguments: 1655 * vdcp - soft state pointer for this instance of the device driver. 1656 * 1657 * Return Code: 1658 * 0 - Success 1659 */ 1660 static int 1661 vdc_ver_negotiation(vdc_t *vdcp) 1662 { 1663 vio_msg_t vio_msg; 1664 int status; 1665 1666 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1667 return (status); 1668 1669 /* release lock and wait for response */ 1670 mutex_exit(&vdcp->lock); 1671 status = vdc_wait_for_response(vdcp, &vio_msg); 1672 mutex_enter(&vdcp->lock); 1673 if (status) { 1674 DMSG(vdcp, 0, 1675 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1676 vdcp->instance, status); 1677 return (status); 1678 } 1679 1680 /* check type and sub_type ... */ 1681 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1682 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1683 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1684 vdcp->instance); 1685 return (EPROTO); 1686 } 1687 1688 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1689 } 1690 1691 /* 1692 * Function: 1693 * vdc_init_attr_negotiation() 1694 * 1695 * Description: 1696 * 1697 * Arguments: 1698 * vdc - soft state pointer for this instance of the device driver. 1699 * 1700 * Return Code: 1701 * 0 - Success 1702 */ 1703 static int 1704 vdc_init_attr_negotiation(vdc_t *vdc) 1705 { 1706 vd_attr_msg_t pkt; 1707 size_t msglen = sizeof (pkt); 1708 int status; 1709 1710 ASSERT(vdc != NULL); 1711 ASSERT(mutex_owned(&vdc->lock)); 1712 1713 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1714 1715 /* fill in tag */ 1716 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1717 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1718 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1719 pkt.tag.vio_sid = vdc->session_id; 1720 /* fill in payload */ 1721 pkt.max_xfer_sz = vdc->max_xfer_sz; 1722 pkt.vdisk_block_size = vdc->block_size; 1723 pkt.xfer_mode = VIO_DRING_MODE_V1_0; 1724 pkt.operations = 0; /* server will set bits of valid operations */ 1725 pkt.vdisk_type = 0; /* server will set to valid device type */ 1726 pkt.vdisk_media = 0; /* server will set to valid media type */ 1727 pkt.vdisk_size = 0; /* server will set to valid size */ 1728 1729 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1730 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1731 1732 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1733 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1734 "id(%lx) rv(%d) size(%ld)", vdc->instance, vdc->ldc_handle, 1735 status, msglen); 1736 if (msglen != sizeof (vio_ver_msg_t)) 1737 status = ENOMSG; 1738 } 1739 1740 return (status); 1741 } 1742 1743 /* 1744 * Function: 1745 * vdc_attr_negotiation() 1746 * 1747 * Description: 1748 * 1749 * Arguments: 1750 * vdc - soft state pointer for this instance of the device driver. 1751 * 1752 * Return Code: 1753 * 0 - Success 1754 */ 1755 static int 1756 vdc_attr_negotiation(vdc_t *vdcp) 1757 { 1758 int status; 1759 vio_msg_t vio_msg; 1760 1761 if (status = vdc_init_attr_negotiation(vdcp)) 1762 return (status); 1763 1764 /* release lock and wait for response */ 1765 mutex_exit(&vdcp->lock); 1766 status = vdc_wait_for_response(vdcp, &vio_msg); 1767 mutex_enter(&vdcp->lock); 1768 if (status) { 1769 DMSG(vdcp, 0, 1770 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1771 vdcp->instance, status); 1772 return (status); 1773 } 1774 1775 /* check type and sub_type ... */ 1776 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1777 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1778 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1779 vdcp->instance); 1780 return (EPROTO); 1781 } 1782 1783 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1784 } 1785 1786 1787 /* 1788 * Function: 1789 * vdc_init_dring_negotiate() 1790 * 1791 * Description: 1792 * 1793 * Arguments: 1794 * vdc - soft state pointer for this instance of the device driver. 1795 * 1796 * Return Code: 1797 * 0 - Success 1798 */ 1799 static int 1800 vdc_init_dring_negotiate(vdc_t *vdc) 1801 { 1802 vio_dring_reg_msg_t pkt; 1803 size_t msglen = sizeof (pkt); 1804 int status = -1; 1805 int retry; 1806 int nretries = 10; 1807 1808 ASSERT(vdc != NULL); 1809 ASSERT(mutex_owned(&vdc->lock)); 1810 1811 for (retry = 0; retry < nretries; retry++) { 1812 status = vdc_init_descriptor_ring(vdc); 1813 if (status != EAGAIN) 1814 break; 1815 drv_usecwait(vdc_min_timeout_ldc); 1816 } 1817 1818 if (status != 0) { 1819 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1820 vdc->instance, status); 1821 return (status); 1822 } 1823 1824 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1825 vdc->instance, status); 1826 1827 /* fill in tag */ 1828 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1829 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1830 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1831 pkt.tag.vio_sid = vdc->session_id; 1832 /* fill in payload */ 1833 pkt.dring_ident = 0; 1834 pkt.num_descriptors = vdc->dring_len; 1835 pkt.descriptor_size = vdc->dring_entry_size; 1836 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1837 pkt.ncookies = vdc->dring_cookie_count; 1838 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1839 1840 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1841 if (status != 0) { 1842 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1843 vdc->instance, status); 1844 } 1845 1846 return (status); 1847 } 1848 1849 1850 /* 1851 * Function: 1852 * vdc_dring_negotiation() 1853 * 1854 * Description: 1855 * 1856 * Arguments: 1857 * vdc - soft state pointer for this instance of the device driver. 1858 * 1859 * Return Code: 1860 * 0 - Success 1861 */ 1862 static int 1863 vdc_dring_negotiation(vdc_t *vdcp) 1864 { 1865 int status; 1866 vio_msg_t vio_msg; 1867 1868 if (status = vdc_init_dring_negotiate(vdcp)) 1869 return (status); 1870 1871 /* release lock and wait for response */ 1872 mutex_exit(&vdcp->lock); 1873 status = vdc_wait_for_response(vdcp, &vio_msg); 1874 mutex_enter(&vdcp->lock); 1875 if (status) { 1876 DMSG(vdcp, 0, 1877 "[%d] Failed waiting for Dring negotiation response," 1878 " rv(%d)", vdcp->instance, status); 1879 return (status); 1880 } 1881 1882 /* check type and sub_type ... */ 1883 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1884 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1885 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1886 vdcp->instance); 1887 return (EPROTO); 1888 } 1889 1890 return (vdc_handle_dring_reg_msg(vdcp, 1891 (vio_dring_reg_msg_t *)&vio_msg)); 1892 } 1893 1894 1895 /* 1896 * Function: 1897 * vdc_send_rdx() 1898 * 1899 * Description: 1900 * 1901 * Arguments: 1902 * vdc - soft state pointer for this instance of the device driver. 1903 * 1904 * Return Code: 1905 * 0 - Success 1906 */ 1907 static int 1908 vdc_send_rdx(vdc_t *vdcp) 1909 { 1910 vio_msg_t msg; 1911 size_t msglen = sizeof (vio_msg_t); 1912 int status; 1913 1914 /* 1915 * Send an RDX message to vds to indicate we are ready 1916 * to send data 1917 */ 1918 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1919 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1920 msg.tag.vio_subtype_env = VIO_RDX; 1921 msg.tag.vio_sid = vdcp->session_id; 1922 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1923 if (status != 0) { 1924 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1925 vdcp->instance, status); 1926 } 1927 1928 return (status); 1929 } 1930 1931 /* 1932 * Function: 1933 * vdc_handle_rdx() 1934 * 1935 * Description: 1936 * 1937 * Arguments: 1938 * vdc - soft state pointer for this instance of the device driver. 1939 * msgp - received msg 1940 * 1941 * Return Code: 1942 * 0 - Success 1943 */ 1944 static int 1945 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1946 { 1947 _NOTE(ARGUNUSED(vdcp)) 1948 _NOTE(ARGUNUSED(msgp)) 1949 1950 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1951 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1952 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1953 1954 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1955 1956 return (0); 1957 } 1958 1959 /* 1960 * Function: 1961 * vdc_rdx_exchange() 1962 * 1963 * Description: 1964 * 1965 * Arguments: 1966 * vdc - soft state pointer for this instance of the device driver. 1967 * 1968 * Return Code: 1969 * 0 - Success 1970 */ 1971 static int 1972 vdc_rdx_exchange(vdc_t *vdcp) 1973 { 1974 int status; 1975 vio_msg_t vio_msg; 1976 1977 if (status = vdc_send_rdx(vdcp)) 1978 return (status); 1979 1980 /* release lock and wait for response */ 1981 mutex_exit(&vdcp->lock); 1982 status = vdc_wait_for_response(vdcp, &vio_msg); 1983 mutex_enter(&vdcp->lock); 1984 if (status) { 1985 DMSG(vdcp, 0, "[%d] Failed waiting for RDX response, rv(%d)", 1986 vdcp->instance, status); 1987 return (status); 1988 } 1989 1990 /* check type and sub_type ... */ 1991 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1992 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1993 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", vdcp->instance); 1994 return (EPROTO); 1995 } 1996 1997 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1998 } 1999 2000 2001 /* -------------------------------------------------------------------------- */ 2002 2003 /* 2004 * LDC helper routines 2005 */ 2006 2007 static int 2008 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 2009 { 2010 int status; 2011 boolean_t q_has_pkts = B_FALSE; 2012 uint64_t delay_time; 2013 size_t len; 2014 2015 mutex_enter(&vdc->read_lock); 2016 2017 if (vdc->read_state == VDC_READ_IDLE) 2018 vdc->read_state = VDC_READ_WAITING; 2019 2020 while (vdc->read_state != VDC_READ_PENDING) { 2021 2022 /* detect if the connection has been reset */ 2023 if (vdc->read_state == VDC_READ_RESET) { 2024 status = ECONNRESET; 2025 goto done; 2026 } 2027 2028 cv_wait(&vdc->read_cv, &vdc->read_lock); 2029 } 2030 2031 /* 2032 * Until we get a blocking ldc read we have to retry 2033 * until the entire LDC message has arrived before 2034 * ldc_read() will succeed. Note we also bail out if 2035 * the channel is reset or goes away. 2036 */ 2037 delay_time = vdc_ldc_read_init_delay; 2038 loop: 2039 len = *nbytesp; 2040 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 2041 switch (status) { 2042 case EAGAIN: 2043 delay_time *= 2; 2044 if (delay_time >= vdc_ldc_read_max_delay) 2045 delay_time = vdc_ldc_read_max_delay; 2046 delay(delay_time); 2047 goto loop; 2048 2049 case 0: 2050 if (len == 0) { 2051 DMSG(vdc, 1, "[%d] ldc_read returned 0 bytes with " 2052 "no error!\n", vdc->instance); 2053 goto loop; 2054 } 2055 2056 *nbytesp = len; 2057 2058 /* 2059 * If there are pending messages, leave the 2060 * read state as pending. Otherwise, set the state 2061 * back to idle. 2062 */ 2063 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 2064 if (status == 0 && !q_has_pkts) 2065 vdc->read_state = VDC_READ_IDLE; 2066 2067 break; 2068 default: 2069 DMSG(vdc, 0, "ldc_read returned %d\n", status); 2070 break; 2071 } 2072 2073 done: 2074 mutex_exit(&vdc->read_lock); 2075 2076 return (status); 2077 } 2078 2079 2080 2081 #ifdef DEBUG 2082 void 2083 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 2084 { 2085 char *ms, *ss, *ses; 2086 switch (msg->tag.vio_msgtype) { 2087 #define Q(_s) case _s : ms = #_s; break; 2088 Q(VIO_TYPE_CTRL) 2089 Q(VIO_TYPE_DATA) 2090 Q(VIO_TYPE_ERR) 2091 #undef Q 2092 default: ms = "unknown"; break; 2093 } 2094 2095 switch (msg->tag.vio_subtype) { 2096 #define Q(_s) case _s : ss = #_s; break; 2097 Q(VIO_SUBTYPE_INFO) 2098 Q(VIO_SUBTYPE_ACK) 2099 Q(VIO_SUBTYPE_NACK) 2100 #undef Q 2101 default: ss = "unknown"; break; 2102 } 2103 2104 switch (msg->tag.vio_subtype_env) { 2105 #define Q(_s) case _s : ses = #_s; break; 2106 Q(VIO_VER_INFO) 2107 Q(VIO_ATTR_INFO) 2108 Q(VIO_DRING_REG) 2109 Q(VIO_DRING_UNREG) 2110 Q(VIO_RDX) 2111 Q(VIO_PKT_DATA) 2112 Q(VIO_DESC_DATA) 2113 Q(VIO_DRING_DATA) 2114 #undef Q 2115 default: ses = "unknown"; break; 2116 } 2117 2118 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 2119 msg->tag.vio_msgtype, msg->tag.vio_subtype, 2120 msg->tag.vio_subtype_env, ms, ss, ses); 2121 } 2122 #endif 2123 2124 /* 2125 * Function: 2126 * vdc_send() 2127 * 2128 * Description: 2129 * The function encapsulates the call to write a message using LDC. 2130 * If LDC indicates that the call failed due to the queue being full, 2131 * we retry the ldc_write(), otherwise we return the error returned by LDC. 2132 * 2133 * Arguments: 2134 * ldc_handle - LDC handle for the channel this instance of vdc uses 2135 * pkt - address of LDC message to be sent 2136 * msglen - the size of the message being sent. When the function 2137 * returns, this contains the number of bytes written. 2138 * 2139 * Return Code: 2140 * 0 - Success. 2141 * EINVAL - pkt or msglen were NULL 2142 * ECONNRESET - The connection was not up. 2143 * EWOULDBLOCK - LDC queue is full 2144 * xxx - other error codes returned by ldc_write 2145 */ 2146 static int 2147 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 2148 { 2149 size_t size = 0; 2150 int status = 0; 2151 clock_t delay_ticks; 2152 2153 ASSERT(vdc != NULL); 2154 ASSERT(mutex_owned(&vdc->lock)); 2155 ASSERT(msglen != NULL); 2156 ASSERT(*msglen != 0); 2157 2158 #ifdef DEBUG 2159 vdc_decode_tag(vdc, (vio_msg_t *)(uintptr_t)pkt); 2160 #endif 2161 /* 2162 * Wait indefinitely to send if channel 2163 * is busy, but bail out if we succeed or 2164 * if the channel closes or is reset. 2165 */ 2166 delay_ticks = vdc_hz_min_ldc_delay; 2167 do { 2168 size = *msglen; 2169 status = ldc_write(vdc->ldc_handle, pkt, &size); 2170 if (status == EWOULDBLOCK) { 2171 delay(delay_ticks); 2172 /* geometric backoff */ 2173 delay_ticks *= 2; 2174 if (delay_ticks > vdc_hz_max_ldc_delay) 2175 delay_ticks = vdc_hz_max_ldc_delay; 2176 } 2177 } while (status == EWOULDBLOCK); 2178 2179 /* if LDC had serious issues --- reset vdc state */ 2180 if (status == EIO || status == ECONNRESET) { 2181 /* LDC had serious issues --- reset vdc state */ 2182 mutex_enter(&vdc->read_lock); 2183 if ((vdc->read_state == VDC_READ_WAITING) || 2184 (vdc->read_state == VDC_READ_RESET)) 2185 cv_signal(&vdc->read_cv); 2186 vdc->read_state = VDC_READ_RESET; 2187 mutex_exit(&vdc->read_lock); 2188 2189 /* wake up any waiters in the reset thread */ 2190 if (vdc->state == VDC_STATE_INIT_WAITING) { 2191 DMSG(vdc, 0, "[%d] write reset - " 2192 "vdc is resetting ..\n", vdc->instance); 2193 vdc->state = VDC_STATE_RESETTING; 2194 cv_signal(&vdc->initwait_cv); 2195 } 2196 2197 return (ECONNRESET); 2198 } 2199 2200 /* return the last size written */ 2201 *msglen = size; 2202 2203 return (status); 2204 } 2205 2206 /* 2207 * Function: 2208 * vdc_get_md_node 2209 * 2210 * Description: 2211 * Get the MD, the device node and the port node for the given 2212 * disk instance. The caller is responsible for cleaning up the 2213 * reference to the returned MD (mdpp) by calling md_fini_handle(). 2214 * 2215 * Arguments: 2216 * dip - dev info pointer for this instance of the device driver. 2217 * mdpp - the returned MD. 2218 * vd_nodep - the returned device node. 2219 * vd_portp - the returned port node. The returned port node is NULL 2220 * if no port node is found. 2221 * 2222 * Return Code: 2223 * 0 - Success. 2224 * ENOENT - Expected node or property did not exist. 2225 * ENXIO - Unexpected error communicating with MD framework 2226 */ 2227 static int 2228 vdc_get_md_node(dev_info_t *dip, md_t **mdpp, mde_cookie_t *vd_nodep, 2229 mde_cookie_t *vd_portp) 2230 { 2231 int status = ENOENT; 2232 char *node_name = NULL; 2233 md_t *mdp = NULL; 2234 int num_nodes; 2235 int num_vdevs; 2236 int num_vports; 2237 mde_cookie_t rootnode; 2238 mde_cookie_t *listp = NULL; 2239 boolean_t found_inst = B_FALSE; 2240 int listsz; 2241 int idx; 2242 uint64_t md_inst; 2243 int obp_inst; 2244 int instance = ddi_get_instance(dip); 2245 2246 /* 2247 * Get the OBP instance number for comparison with the MD instance 2248 * 2249 * The "cfg-handle" property of a vdc node in an MD contains the MD's 2250 * notion of "instance", or unique identifier, for that node; OBP 2251 * stores the value of the "cfg-handle" MD property as the value of 2252 * the "reg" property on the node in the device tree it builds from 2253 * the MD and passes to Solaris. Thus, we look up the devinfo node's 2254 * "reg" property value to uniquely identify this device instance. 2255 * If the "reg" property cannot be found, the device tree state is 2256 * presumably so broken that there is no point in continuing. 2257 */ 2258 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 2259 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 2260 return (ENOENT); 2261 } 2262 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 2263 OBP_REG, -1); 2264 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 2265 2266 /* 2267 * We now walk the MD nodes to find the node for this vdisk. 2268 */ 2269 if ((mdp = md_get_handle()) == NULL) { 2270 cmn_err(CE_WARN, "unable to init machine description"); 2271 return (ENXIO); 2272 } 2273 2274 num_nodes = md_node_count(mdp); 2275 ASSERT(num_nodes > 0); 2276 2277 listsz = num_nodes * sizeof (mde_cookie_t); 2278 2279 /* allocate memory for nodes */ 2280 listp = kmem_zalloc(listsz, KM_SLEEP); 2281 2282 rootnode = md_root_node(mdp); 2283 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 2284 2285 /* 2286 * Search for all the virtual devices, we will then check to see which 2287 * ones are disk nodes. 2288 */ 2289 num_vdevs = md_scan_dag(mdp, rootnode, 2290 md_find_name(mdp, VDC_MD_VDEV_NAME), 2291 md_find_name(mdp, "fwd"), listp); 2292 2293 if (num_vdevs <= 0) { 2294 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 2295 status = ENOENT; 2296 goto done; 2297 } 2298 2299 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 2300 for (idx = 0; idx < num_vdevs; idx++) { 2301 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 2302 if ((status != 0) || (node_name == NULL)) { 2303 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 2304 ": err %d", VDC_MD_VDEV_NAME, status); 2305 continue; 2306 } 2307 2308 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 2309 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 2310 status = md_get_prop_val(mdp, listp[idx], 2311 VDC_MD_CFG_HDL, &md_inst); 2312 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 2313 instance, md_inst); 2314 if ((status == 0) && (md_inst == obp_inst)) { 2315 found_inst = B_TRUE; 2316 break; 2317 } 2318 } 2319 } 2320 2321 if (!found_inst) { 2322 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 2323 status = ENOENT; 2324 goto done; 2325 } 2326 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 2327 2328 *vd_nodep = listp[idx]; 2329 *mdpp = mdp; 2330 2331 num_vports = md_scan_dag(mdp, *vd_nodep, 2332 md_find_name(mdp, VDC_MD_PORT_NAME), 2333 md_find_name(mdp, "fwd"), listp); 2334 2335 if (num_vports != 1) { 2336 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2337 VDC_MD_PORT_NAME, VDC_MD_VDEV_NAME, num_vports); 2338 } 2339 2340 *vd_portp = (num_vports == 0)? NULL: listp[0]; 2341 2342 done: 2343 kmem_free(listp, listsz); 2344 return (status); 2345 } 2346 2347 /* 2348 * Function: 2349 * vdc_get_ldc_id() 2350 * 2351 * Description: 2352 * This function gets the 'ldc-id' for this particular instance of vdc. 2353 * The id returned is the guest domain channel endpoint LDC uses for 2354 * communication with vds. 2355 * 2356 * Arguments: 2357 * mdp - pointer to the machine description. 2358 * vd_node - the vdisk element from the MD. 2359 * ldc_id - pointer to variable used to return the 'ldc-id' found. 2360 * 2361 * Return Code: 2362 * 0 - Success. 2363 * ENOENT - Expected node or property did not exist. 2364 */ 2365 static int 2366 vdc_get_ldc_id(md_t *mdp, mde_cookie_t vd_node, uint64_t *ldc_id) 2367 { 2368 mde_cookie_t *chanp = NULL; 2369 int listsz; 2370 int num_chans; 2371 int num_nodes; 2372 int status = 0; 2373 2374 num_nodes = md_node_count(mdp); 2375 ASSERT(num_nodes > 0); 2376 2377 listsz = num_nodes * sizeof (mde_cookie_t); 2378 2379 /* allocate memory for nodes */ 2380 chanp = kmem_zalloc(listsz, KM_SLEEP); 2381 2382 /* get the channels for this node */ 2383 num_chans = md_scan_dag(mdp, vd_node, 2384 md_find_name(mdp, VDC_MD_CHAN_NAME), 2385 md_find_name(mdp, "fwd"), chanp); 2386 2387 /* expecting at least one channel */ 2388 if (num_chans <= 0) { 2389 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 2390 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 2391 status = ENOENT; 2392 goto done; 2393 2394 } else if (num_chans != 1) { 2395 DMSGX(0, "Expected 1 '%s' node for '%s' port, found %d\n", 2396 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, num_chans); 2397 } 2398 2399 /* 2400 * We use the first channel found (index 0), irrespective of how 2401 * many are there in total. 2402 */ 2403 if (md_get_prop_val(mdp, chanp[0], VDC_MD_ID, ldc_id) != 0) { 2404 cmn_err(CE_NOTE, "Channel '%s' property not found", VDC_MD_ID); 2405 status = ENOENT; 2406 } 2407 2408 done: 2409 kmem_free(chanp, listsz); 2410 return (status); 2411 } 2412 2413 static int 2414 vdc_do_ldc_up(vdc_t *vdc) 2415 { 2416 int status; 2417 ldc_status_t ldc_state; 2418 2419 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 2420 vdc->instance, vdc->ldc_id); 2421 2422 if (vdc->lifecycle == VDC_LC_DETACHING) 2423 return (EINVAL); 2424 2425 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 2426 switch (status) { 2427 case ECONNREFUSED: /* listener not ready at other end */ 2428 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 2429 vdc->instance, vdc->ldc_id, status); 2430 status = 0; 2431 break; 2432 default: 2433 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2434 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2435 status); 2436 break; 2437 } 2438 } 2439 2440 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2441 vdc->ldc_state = ldc_state; 2442 if (ldc_state == LDC_UP) { 2443 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2444 vdc->instance); 2445 vdc->seq_num = 1; 2446 vdc->seq_num_reply = 0; 2447 } 2448 } 2449 2450 return (status); 2451 } 2452 2453 /* 2454 * Function: 2455 * vdc_terminate_ldc() 2456 * 2457 * Description: 2458 * 2459 * Arguments: 2460 * vdc - soft state pointer for this instance of the device driver. 2461 * 2462 * Return Code: 2463 * None 2464 */ 2465 static void 2466 vdc_terminate_ldc(vdc_t *vdc) 2467 { 2468 int instance = ddi_get_instance(vdc->dip); 2469 2470 ASSERT(vdc != NULL); 2471 ASSERT(mutex_owned(&vdc->lock)); 2472 2473 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2474 2475 if (vdc->initialized & VDC_LDC_OPEN) { 2476 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2477 (void) ldc_close(vdc->ldc_handle); 2478 } 2479 if (vdc->initialized & VDC_LDC_CB) { 2480 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2481 (void) ldc_unreg_callback(vdc->ldc_handle); 2482 } 2483 if (vdc->initialized & VDC_LDC) { 2484 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2485 (void) ldc_fini(vdc->ldc_handle); 2486 vdc->ldc_handle = NULL; 2487 } 2488 2489 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2490 } 2491 2492 /* -------------------------------------------------------------------------- */ 2493 2494 /* 2495 * Descriptor Ring helper routines 2496 */ 2497 2498 /* 2499 * Function: 2500 * vdc_init_descriptor_ring() 2501 * 2502 * Description: 2503 * 2504 * Arguments: 2505 * vdc - soft state pointer for this instance of the device driver. 2506 * 2507 * Return Code: 2508 * 0 - Success 2509 */ 2510 static int 2511 vdc_init_descriptor_ring(vdc_t *vdc) 2512 { 2513 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2514 int status = 0; 2515 int i; 2516 2517 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2518 2519 ASSERT(vdc != NULL); 2520 ASSERT(mutex_owned(&vdc->lock)); 2521 ASSERT(vdc->ldc_handle != NULL); 2522 2523 /* ensure we have enough room to store max sized block */ 2524 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2525 2526 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2527 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2528 /* 2529 * Calculate the maximum block size we can transmit using one 2530 * Descriptor Ring entry from the attributes returned by the 2531 * vDisk server. This is subject to a minimum of 'maxphys' 2532 * as we do not have the capability to split requests over 2533 * multiple DRing entries. 2534 */ 2535 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2536 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2537 vdc->instance); 2538 vdc->dring_max_cookies = maxphys / PAGESIZE; 2539 } else { 2540 vdc->dring_max_cookies = 2541 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2542 } 2543 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2544 (sizeof (ldc_mem_cookie_t) * 2545 (vdc->dring_max_cookies - 1))); 2546 vdc->dring_len = VD_DRING_LEN; 2547 2548 status = ldc_mem_dring_create(vdc->dring_len, 2549 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2550 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2551 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2552 vdc->instance); 2553 return (status); 2554 } 2555 vdc->initialized |= VDC_DRING_INIT; 2556 } 2557 2558 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2559 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2560 vdc->dring_cookie = 2561 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2562 2563 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2564 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2565 &vdc->dring_cookie[0], 2566 &vdc->dring_cookie_count); 2567 if (status != 0) { 2568 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2569 "(%lx) to channel (%lx) status=%d\n", 2570 vdc->instance, vdc->ldc_dring_hdl, 2571 vdc->ldc_handle, status); 2572 return (status); 2573 } 2574 ASSERT(vdc->dring_cookie_count == 1); 2575 vdc->initialized |= VDC_DRING_BOUND; 2576 } 2577 2578 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2579 if (status != 0) { 2580 DMSG(vdc, 0, 2581 "[%d] Failed to get info for descriptor ring (%lx)\n", 2582 vdc->instance, vdc->ldc_dring_hdl); 2583 return (status); 2584 } 2585 2586 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2587 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2588 2589 /* Allocate the local copy of this dring */ 2590 vdc->local_dring = 2591 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2592 KM_SLEEP); 2593 vdc->initialized |= VDC_DRING_LOCAL; 2594 } 2595 2596 /* 2597 * Mark all DRing entries as free and initialize the private 2598 * descriptor's memory handles. If any entry is initialized, 2599 * we need to free it later so we set the bit in 'initialized' 2600 * at the start. 2601 */ 2602 vdc->initialized |= VDC_DRING_ENTRY; 2603 for (i = 0; i < vdc->dring_len; i++) { 2604 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2605 dep->hdr.dstate = VIO_DESC_FREE; 2606 2607 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2608 &vdc->local_dring[i].desc_mhdl); 2609 if (status != 0) { 2610 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2611 " descriptor %d", vdc->instance, i); 2612 return (status); 2613 } 2614 vdc->local_dring[i].is_free = B_TRUE; 2615 vdc->local_dring[i].dep = dep; 2616 } 2617 2618 /* Initialize the starting index */ 2619 vdc->dring_curr_idx = 0; 2620 2621 return (status); 2622 } 2623 2624 /* 2625 * Function: 2626 * vdc_destroy_descriptor_ring() 2627 * 2628 * Description: 2629 * 2630 * Arguments: 2631 * vdc - soft state pointer for this instance of the device driver. 2632 * 2633 * Return Code: 2634 * None 2635 */ 2636 static void 2637 vdc_destroy_descriptor_ring(vdc_t *vdc) 2638 { 2639 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2640 ldc_mem_handle_t mhdl = NULL; 2641 ldc_mem_info_t minfo; 2642 int status = -1; 2643 int i; /* loop */ 2644 2645 ASSERT(vdc != NULL); 2646 ASSERT(mutex_owned(&vdc->lock)); 2647 2648 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2649 2650 if (vdc->initialized & VDC_DRING_ENTRY) { 2651 DMSG(vdc, 0, 2652 "[%d] Removing Local DRing entries\n", vdc->instance); 2653 for (i = 0; i < vdc->dring_len; i++) { 2654 ldep = &vdc->local_dring[i]; 2655 mhdl = ldep->desc_mhdl; 2656 2657 if (mhdl == NULL) 2658 continue; 2659 2660 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2661 DMSG(vdc, 0, 2662 "ldc_mem_info returned an error: %d\n", 2663 status); 2664 2665 /* 2666 * This must mean that the mem handle 2667 * is not valid. Clear it out so that 2668 * no one tries to use it. 2669 */ 2670 ldep->desc_mhdl = NULL; 2671 continue; 2672 } 2673 2674 if (minfo.status == LDC_BOUND) { 2675 (void) ldc_mem_unbind_handle(mhdl); 2676 } 2677 2678 (void) ldc_mem_free_handle(mhdl); 2679 2680 ldep->desc_mhdl = NULL; 2681 } 2682 vdc->initialized &= ~VDC_DRING_ENTRY; 2683 } 2684 2685 if (vdc->initialized & VDC_DRING_LOCAL) { 2686 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2687 kmem_free(vdc->local_dring, 2688 vdc->dring_len * sizeof (vdc_local_desc_t)); 2689 vdc->initialized &= ~VDC_DRING_LOCAL; 2690 } 2691 2692 if (vdc->initialized & VDC_DRING_BOUND) { 2693 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2694 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2695 if (status == 0) { 2696 vdc->initialized &= ~VDC_DRING_BOUND; 2697 } else { 2698 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2699 vdc->instance, status, vdc->ldc_dring_hdl); 2700 } 2701 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2702 } 2703 2704 if (vdc->initialized & VDC_DRING_INIT) { 2705 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2706 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2707 if (status == 0) { 2708 vdc->ldc_dring_hdl = NULL; 2709 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2710 vdc->initialized &= ~VDC_DRING_INIT; 2711 } else { 2712 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2713 vdc->instance, status, vdc->ldc_dring_hdl); 2714 } 2715 } 2716 } 2717 2718 /* 2719 * Function: 2720 * vdc_map_to_shared_dring() 2721 * 2722 * Description: 2723 * Copy contents of the local descriptor to the shared 2724 * memory descriptor. 2725 * 2726 * Arguments: 2727 * vdcp - soft state pointer for this instance of the device driver. 2728 * idx - descriptor ring index 2729 * 2730 * Return Code: 2731 * None 2732 */ 2733 static int 2734 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2735 { 2736 vdc_local_desc_t *ldep; 2737 vd_dring_entry_t *dep; 2738 int rv; 2739 2740 ldep = &(vdcp->local_dring[idx]); 2741 2742 /* for now leave in the old pop_mem_hdl stuff */ 2743 if (ldep->nbytes > 0) { 2744 rv = vdc_populate_mem_hdl(vdcp, ldep); 2745 if (rv) { 2746 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2747 vdcp->instance); 2748 return (rv); 2749 } 2750 } 2751 2752 /* 2753 * fill in the data details into the DRing 2754 */ 2755 dep = ldep->dep; 2756 ASSERT(dep != NULL); 2757 2758 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2759 dep->payload.operation = ldep->operation; 2760 dep->payload.addr = ldep->offset; 2761 dep->payload.nbytes = ldep->nbytes; 2762 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2763 dep->payload.slice = ldep->slice; 2764 dep->hdr.dstate = VIO_DESC_READY; 2765 dep->hdr.ack = 1; /* request an ACK for every message */ 2766 2767 return (0); 2768 } 2769 2770 /* 2771 * Function: 2772 * vdc_send_request 2773 * 2774 * Description: 2775 * This routine writes the data to be transmitted to vds into the 2776 * descriptor, notifies vds that the ring has been updated and 2777 * then waits for the request to be processed. 2778 * 2779 * Arguments: 2780 * vdcp - the soft state pointer 2781 * operation - operation we want vds to perform (VD_OP_XXX) 2782 * addr - address of data buf to be read/written. 2783 * nbytes - number of bytes to read/write 2784 * slice - the disk slice this request is for 2785 * offset - relative disk offset 2786 * cb_type - type of call - STRATEGY or SYNC 2787 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2788 * . mode for ioctl(9e) 2789 * . LP64 diskaddr_t (block I/O) 2790 * dir - direction of operation (READ/WRITE/BOTH) 2791 * 2792 * Return Codes: 2793 * 0 2794 * ENXIO 2795 */ 2796 static int 2797 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2798 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2799 void *cb_arg, vio_desc_direction_t dir) 2800 { 2801 int rv = 0; 2802 2803 ASSERT(vdcp != NULL); 2804 ASSERT(slice == VD_SLICE_NONE || slice < V_NUMPAR); 2805 2806 mutex_enter(&vdcp->lock); 2807 2808 /* 2809 * If this is a block read/write operation we update the I/O statistics 2810 * to indicate that the request is being put on the waitq to be 2811 * serviced. 2812 * 2813 * We do it here (a common routine for both synchronous and strategy 2814 * calls) for performance reasons - we are already holding vdc->lock 2815 * so there is no extra locking overhead. We would have to explicitly 2816 * grab the 'lock' mutex to update the stats if we were to do this 2817 * higher up the stack in vdc_strategy() et. al. 2818 */ 2819 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2820 DTRACE_IO1(start, buf_t *, cb_arg); 2821 VD_KSTAT_WAITQ_ENTER(vdcp); 2822 } 2823 2824 do { 2825 while (vdcp->state != VDC_STATE_RUNNING) { 2826 2827 /* return error if detaching */ 2828 if (vdcp->state == VDC_STATE_DETACH) { 2829 rv = ENXIO; 2830 goto done; 2831 } 2832 2833 /* fail request if connection timeout is reached */ 2834 if (vdcp->ctimeout_reached) { 2835 rv = EIO; 2836 goto done; 2837 } 2838 2839 /* 2840 * If we are panicking and the disk is not ready then 2841 * we can't send any request because we can't complete 2842 * the handshake now. 2843 */ 2844 if (ddi_in_panic()) { 2845 rv = EIO; 2846 goto done; 2847 } 2848 2849 cv_wait(&vdcp->running_cv, &vdcp->lock); 2850 } 2851 2852 } while (vdc_populate_descriptor(vdcp, operation, addr, 2853 nbytes, slice, offset, cb_type, cb_arg, dir)); 2854 2855 done: 2856 /* 2857 * If this is a block read/write we update the I/O statistics kstat 2858 * to indicate that this request has been placed on the queue for 2859 * processing (i.e sent to the vDisk server) - iostat(1M) will 2860 * report the time waiting for the vDisk server under the %b column 2861 * In the case of an error we simply take it off the wait queue. 2862 */ 2863 if ((operation == VD_OP_BREAD) || (operation == VD_OP_BWRITE)) { 2864 if (rv == 0) { 2865 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 2866 DTRACE_PROBE1(send, buf_t *, cb_arg); 2867 } else { 2868 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 2869 VD_KSTAT_WAITQ_EXIT(vdcp); 2870 DTRACE_IO1(done, buf_t *, cb_arg); 2871 } 2872 } 2873 2874 mutex_exit(&vdcp->lock); 2875 2876 return (rv); 2877 } 2878 2879 2880 /* 2881 * Function: 2882 * vdc_populate_descriptor 2883 * 2884 * Description: 2885 * This routine writes the data to be transmitted to vds into the 2886 * descriptor, notifies vds that the ring has been updated and 2887 * then waits for the request to be processed. 2888 * 2889 * Arguments: 2890 * vdcp - the soft state pointer 2891 * operation - operation we want vds to perform (VD_OP_XXX) 2892 * addr - address of data buf to be read/written. 2893 * nbytes - number of bytes to read/write 2894 * slice - the disk slice this request is for 2895 * offset - relative disk offset 2896 * cb_type - type of call - STRATEGY or SYNC 2897 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2898 * . mode for ioctl(9e) 2899 * . LP64 diskaddr_t (block I/O) 2900 * dir - direction of operation (READ/WRITE/BOTH) 2901 * 2902 * Return Codes: 2903 * 0 2904 * EAGAIN 2905 * ECONNRESET 2906 * ENXIO 2907 */ 2908 static int 2909 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2910 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2911 void *cb_arg, vio_desc_direction_t dir) 2912 { 2913 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2914 int idx; /* Index of DRing entry used */ 2915 int next_idx; 2916 vio_dring_msg_t dmsg; 2917 size_t msglen; 2918 int rv; 2919 2920 ASSERT(MUTEX_HELD(&vdcp->lock)); 2921 vdcp->threads_pending++; 2922 loop: 2923 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2924 2925 /* Get next available D-Ring entry */ 2926 idx = vdcp->dring_curr_idx; 2927 local_dep = &(vdcp->local_dring[idx]); 2928 2929 if (!local_dep->is_free) { 2930 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2931 vdcp->instance); 2932 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2933 if (vdcp->state == VDC_STATE_RUNNING || 2934 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2935 goto loop; 2936 } 2937 vdcp->threads_pending--; 2938 return (ECONNRESET); 2939 } 2940 2941 next_idx = idx + 1; 2942 if (next_idx >= vdcp->dring_len) 2943 next_idx = 0; 2944 vdcp->dring_curr_idx = next_idx; 2945 2946 ASSERT(local_dep->is_free); 2947 2948 local_dep->operation = operation; 2949 local_dep->addr = addr; 2950 local_dep->nbytes = nbytes; 2951 local_dep->slice = slice; 2952 local_dep->offset = offset; 2953 local_dep->cb_type = cb_type; 2954 local_dep->cb_arg = cb_arg; 2955 local_dep->dir = dir; 2956 2957 local_dep->is_free = B_FALSE; 2958 2959 rv = vdc_map_to_shared_dring(vdcp, idx); 2960 if (rv) { 2961 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2962 vdcp->instance); 2963 /* free the descriptor */ 2964 local_dep->is_free = B_TRUE; 2965 vdcp->dring_curr_idx = idx; 2966 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2967 if (vdcp->state == VDC_STATE_RUNNING || 2968 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2969 goto loop; 2970 } 2971 vdcp->threads_pending--; 2972 return (ECONNRESET); 2973 } 2974 2975 /* 2976 * Send a msg with the DRing details to vds 2977 */ 2978 VIO_INIT_DRING_DATA_TAG(dmsg); 2979 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2980 dmsg.dring_ident = vdcp->dring_ident; 2981 dmsg.start_idx = idx; 2982 dmsg.end_idx = idx; 2983 vdcp->seq_num++; 2984 2985 DTRACE_PROBE2(populate, int, vdcp->instance, 2986 vdc_local_desc_t *, local_dep); 2987 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2988 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2989 2990 /* 2991 * note we're still holding the lock here to 2992 * make sure the message goes out in order !!!... 2993 */ 2994 msglen = sizeof (dmsg); 2995 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2996 switch (rv) { 2997 case ECONNRESET: 2998 /* 2999 * vdc_send initiates the reset on failure. 3000 * Since the transaction has already been put 3001 * on the local dring, it will automatically get 3002 * retried when the channel is reset. Given that, 3003 * it is ok to just return success even though the 3004 * send failed. 3005 */ 3006 rv = 0; 3007 break; 3008 3009 case 0: /* EOK */ 3010 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 3011 break; 3012 3013 default: 3014 goto cleanup_and_exit; 3015 } 3016 3017 vdcp->threads_pending--; 3018 return (rv); 3019 3020 cleanup_and_exit: 3021 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 3022 return (ENXIO); 3023 } 3024 3025 /* 3026 * Function: 3027 * vdc_do_sync_op 3028 * 3029 * Description: 3030 * Wrapper around vdc_populate_descriptor that blocks until the 3031 * response to the message is available. 3032 * 3033 * Arguments: 3034 * vdcp - the soft state pointer 3035 * operation - operation we want vds to perform (VD_OP_XXX) 3036 * addr - address of data buf to be read/written. 3037 * nbytes - number of bytes to read/write 3038 * slice - the disk slice this request is for 3039 * offset - relative disk offset 3040 * cb_type - type of call - STRATEGY or SYNC 3041 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 3042 * . mode for ioctl(9e) 3043 * . LP64 diskaddr_t (block I/O) 3044 * dir - direction of operation (READ/WRITE/BOTH) 3045 * rconflict - check for reservation conflict in case of failure 3046 * 3047 * rconflict should be set to B_TRUE by most callers. Callers invoking the 3048 * VD_OP_SCSICMD operation can set rconflict to B_FALSE if they check the 3049 * result of a successful operation with vd_scsi_status(). 3050 * 3051 * Return Codes: 3052 * 0 3053 * EAGAIN 3054 * EFAULT 3055 * ENXIO 3056 * EIO 3057 */ 3058 static int 3059 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 3060 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 3061 vio_desc_direction_t dir, boolean_t rconflict) 3062 { 3063 int status; 3064 vdc_io_t *vio; 3065 boolean_t check_resv_conflict = B_FALSE; 3066 3067 ASSERT(cb_type == CB_SYNC); 3068 3069 /* 3070 * Grab the lock, if blocked wait until the server 3071 * response causes us to wake up again. 3072 */ 3073 mutex_enter(&vdcp->lock); 3074 vdcp->sync_op_cnt++; 3075 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 3076 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 3077 3078 if (vdcp->state == VDC_STATE_DETACH) { 3079 cv_broadcast(&vdcp->sync_blocked_cv); 3080 vdcp->sync_op_cnt--; 3081 mutex_exit(&vdcp->lock); 3082 return (ENXIO); 3083 } 3084 3085 /* now block anyone other thread entering after us */ 3086 vdcp->sync_op_blocked = B_TRUE; 3087 vdcp->sync_op_pending = B_TRUE; 3088 mutex_exit(&vdcp->lock); 3089 3090 status = vdc_send_request(vdcp, operation, addr, 3091 nbytes, slice, offset, cb_type, cb_arg, dir); 3092 3093 mutex_enter(&vdcp->lock); 3094 3095 if (status != 0) { 3096 vdcp->sync_op_pending = B_FALSE; 3097 } else { 3098 /* 3099 * block until our transaction completes. 3100 * Also anyone else waiting also gets to go next. 3101 */ 3102 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 3103 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 3104 3105 DMSG(vdcp, 2, ": operation returned %d\n", 3106 vdcp->sync_op_status); 3107 if (vdcp->state == VDC_STATE_DETACH) { 3108 vdcp->sync_op_pending = B_FALSE; 3109 status = ENXIO; 3110 } else { 3111 status = vdcp->sync_op_status; 3112 if (status != 0 && vdcp->failfast_interval != 0) { 3113 /* 3114 * Operation has failed and failfast is enabled. 3115 * We need to check if the failure is due to a 3116 * reservation conflict if this was requested. 3117 */ 3118 check_resv_conflict = rconflict; 3119 } 3120 3121 } 3122 } 3123 3124 vdcp->sync_op_status = 0; 3125 vdcp->sync_op_blocked = B_FALSE; 3126 vdcp->sync_op_cnt--; 3127 3128 /* signal the next waiting thread */ 3129 cv_signal(&vdcp->sync_blocked_cv); 3130 3131 /* 3132 * We have to check for reservation conflict after unblocking sync 3133 * operations because some sync operations will be used to do this 3134 * check. 3135 */ 3136 if (check_resv_conflict) { 3137 vio = vdc_failfast_io_queue(vdcp, NULL); 3138 while (vio->vio_qtime != 0) 3139 cv_wait(&vdcp->failfast_io_cv, &vdcp->lock); 3140 kmem_free(vio, sizeof (vdc_io_t)); 3141 } 3142 3143 mutex_exit(&vdcp->lock); 3144 3145 return (status); 3146 } 3147 3148 3149 /* 3150 * Function: 3151 * vdc_drain_response() 3152 * 3153 * Description: 3154 * When a guest is panicking, the completion of requests needs to be 3155 * handled differently because interrupts are disabled and vdc 3156 * will not get messages. We have to poll for the messages instead. 3157 * 3158 * Note: since we don't have a buf_t available we cannot implement 3159 * the io:::done DTrace probe in this specific case. 3160 * 3161 * Arguments: 3162 * vdc - soft state pointer for this instance of the device driver. 3163 * 3164 * Return Code: 3165 * 0 - Success 3166 */ 3167 static int 3168 vdc_drain_response(vdc_t *vdc) 3169 { 3170 int rv, idx, retries; 3171 size_t msglen; 3172 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 3173 vio_dring_msg_t dmsg; 3174 3175 mutex_enter(&vdc->lock); 3176 3177 retries = 0; 3178 for (;;) { 3179 msglen = sizeof (dmsg); 3180 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 3181 if (rv) { 3182 rv = EINVAL; 3183 break; 3184 } 3185 3186 /* 3187 * if there are no packets wait and check again 3188 */ 3189 if ((rv == 0) && (msglen == 0)) { 3190 if (retries++ > vdc_dump_retries) { 3191 rv = EAGAIN; 3192 break; 3193 } 3194 3195 drv_usecwait(vdc_usec_timeout_dump); 3196 continue; 3197 } 3198 3199 /* 3200 * Ignore all messages that are not ACKs/NACKs to 3201 * DRing requests. 3202 */ 3203 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 3204 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 3205 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 3206 dmsg.tag.vio_msgtype, 3207 dmsg.tag.vio_subtype, 3208 dmsg.tag.vio_subtype_env); 3209 continue; 3210 } 3211 3212 /* 3213 * set the appropriate return value for the current request. 3214 */ 3215 switch (dmsg.tag.vio_subtype) { 3216 case VIO_SUBTYPE_ACK: 3217 rv = 0; 3218 break; 3219 case VIO_SUBTYPE_NACK: 3220 rv = EAGAIN; 3221 break; 3222 default: 3223 continue; 3224 } 3225 3226 idx = dmsg.start_idx; 3227 if (idx >= vdc->dring_len) { 3228 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 3229 vdc->instance, idx); 3230 continue; 3231 } 3232 ldep = &vdc->local_dring[idx]; 3233 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 3234 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 3235 vdc->instance, idx, ldep->dep->hdr.dstate); 3236 continue; 3237 } 3238 3239 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 3240 vdc->instance, idx, ldep->dep->hdr.dstate); 3241 3242 rv = vdc_depopulate_descriptor(vdc, idx); 3243 if (rv) { 3244 DMSG(vdc, 0, 3245 "[%d] Entry @ %d - depopulate failed ..\n", 3246 vdc->instance, idx); 3247 } 3248 3249 /* if this is the last descriptor - break out of loop */ 3250 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 3251 break; 3252 } 3253 3254 mutex_exit(&vdc->lock); 3255 DMSG(vdc, 0, "End idx=%d\n", idx); 3256 3257 return (rv); 3258 } 3259 3260 3261 /* 3262 * Function: 3263 * vdc_depopulate_descriptor() 3264 * 3265 * Description: 3266 * 3267 * Arguments: 3268 * vdc - soft state pointer for this instance of the device driver. 3269 * idx - Index of the Descriptor Ring entry being modified 3270 * 3271 * Return Code: 3272 * 0 - Success 3273 */ 3274 static int 3275 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 3276 { 3277 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 3278 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 3279 int status = ENXIO; 3280 int rv = 0; 3281 3282 ASSERT(vdc != NULL); 3283 ASSERT(idx < vdc->dring_len); 3284 ldep = &vdc->local_dring[idx]; 3285 ASSERT(ldep != NULL); 3286 ASSERT(MUTEX_HELD(&vdc->lock)); 3287 3288 DTRACE_PROBE2(depopulate, int, vdc->instance, vdc_local_desc_t *, ldep); 3289 DMSG(vdc, 2, ": idx = %d\n", idx); 3290 3291 dep = ldep->dep; 3292 ASSERT(dep != NULL); 3293 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 3294 (dep->payload.status == ECANCELED)); 3295 3296 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 3297 3298 ldep->is_free = B_TRUE; 3299 status = dep->payload.status; 3300 DMSG(vdc, 2, ": is_free = %d : status = %d\n", ldep->is_free, status); 3301 3302 /* 3303 * If no buffers were used to transfer information to the server when 3304 * populating the descriptor then no memory handles need to be unbound 3305 * and we can return now. 3306 */ 3307 if (ldep->nbytes == 0) { 3308 cv_signal(&vdc->dring_free_cv); 3309 return (status); 3310 } 3311 3312 /* 3313 * If the upper layer passed in a misaligned address we copied the 3314 * data into an aligned buffer before sending it to LDC - we now 3315 * copy it back to the original buffer. 3316 */ 3317 if (ldep->align_addr) { 3318 ASSERT(ldep->addr != NULL); 3319 3320 if (dep->payload.nbytes > 0) 3321 bcopy(ldep->align_addr, ldep->addr, 3322 dep->payload.nbytes); 3323 kmem_free(ldep->align_addr, 3324 sizeof (caddr_t) * P2ROUNDUP(ldep->nbytes, 8)); 3325 ldep->align_addr = NULL; 3326 } 3327 3328 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 3329 if (rv != 0) { 3330 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 3331 vdc->instance, ldep->desc_mhdl, idx, rv); 3332 /* 3333 * The error returned by the vDisk server is more informative 3334 * and thus has a higher priority but if it isn't set we ensure 3335 * that this function returns an error. 3336 */ 3337 if (status == 0) 3338 status = EINVAL; 3339 } 3340 3341 cv_signal(&vdc->membind_cv); 3342 cv_signal(&vdc->dring_free_cv); 3343 3344 return (status); 3345 } 3346 3347 /* 3348 * Function: 3349 * vdc_populate_mem_hdl() 3350 * 3351 * Description: 3352 * 3353 * Arguments: 3354 * vdc - soft state pointer for this instance of the device driver. 3355 * idx - Index of the Descriptor Ring entry being modified 3356 * addr - virtual address being mapped in 3357 * nybtes - number of bytes in 'addr' 3358 * operation - the vDisk operation being performed (VD_OP_xxx) 3359 * 3360 * Return Code: 3361 * 0 - Success 3362 */ 3363 static int 3364 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 3365 { 3366 vd_dring_entry_t *dep = NULL; 3367 ldc_mem_handle_t mhdl; 3368 caddr_t vaddr; 3369 size_t nbytes; 3370 uint8_t perm = LDC_MEM_RW; 3371 uint8_t maptype; 3372 int rv = 0; 3373 int i; 3374 3375 ASSERT(vdcp != NULL); 3376 3377 dep = ldep->dep; 3378 mhdl = ldep->desc_mhdl; 3379 3380 switch (ldep->dir) { 3381 case VIO_read_dir: 3382 perm = LDC_MEM_W; 3383 break; 3384 3385 case VIO_write_dir: 3386 perm = LDC_MEM_R; 3387 break; 3388 3389 case VIO_both_dir: 3390 perm = LDC_MEM_RW; 3391 break; 3392 3393 default: 3394 ASSERT(0); /* catch bad programming in vdc */ 3395 } 3396 3397 /* 3398 * LDC expects any addresses passed in to be 8-byte aligned. We need 3399 * to copy the contents of any misaligned buffers to a newly allocated 3400 * buffer and bind it instead (and copy the the contents back to the 3401 * original buffer passed in when depopulating the descriptor) 3402 */ 3403 vaddr = ldep->addr; 3404 nbytes = ldep->nbytes; 3405 if (((uint64_t)vaddr & 0x7) != 0) { 3406 ASSERT(ldep->align_addr == NULL); 3407 ldep->align_addr = 3408 kmem_alloc(sizeof (caddr_t) * 3409 P2ROUNDUP(nbytes, 8), KM_SLEEP); 3410 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 3411 "(buf=%p nb=%ld op=%d)\n", 3412 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 3413 nbytes, ldep->operation); 3414 if (perm != LDC_MEM_W) 3415 bcopy(vaddr, ldep->align_addr, nbytes); 3416 vaddr = ldep->align_addr; 3417 } 3418 3419 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 3420 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 3421 maptype, perm, &dep->payload.cookie[0], &dep->payload.ncookies); 3422 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 3423 vdcp->instance, dep->payload.ncookies); 3424 if (rv != 0) { 3425 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 3426 "(mhdl=%p, buf=%p, err=%d)\n", 3427 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 3428 if (ldep->align_addr) { 3429 kmem_free(ldep->align_addr, 3430 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 3431 ldep->align_addr = NULL; 3432 } 3433 return (EAGAIN); 3434 } 3435 3436 /* 3437 * Get the other cookies (if any). 3438 */ 3439 for (i = 1; i < dep->payload.ncookies; i++) { 3440 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 3441 if (rv != 0) { 3442 (void) ldc_mem_unbind_handle(mhdl); 3443 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 3444 "(mhdl=%lx cnum=%d), err=%d", 3445 vdcp->instance, mhdl, i, rv); 3446 if (ldep->align_addr) { 3447 kmem_free(ldep->align_addr, 3448 sizeof (caddr_t) * ldep->nbytes); 3449 ldep->align_addr = NULL; 3450 } 3451 return (EAGAIN); 3452 } 3453 } 3454 3455 return (rv); 3456 } 3457 3458 /* 3459 * Interrupt handlers for messages from LDC 3460 */ 3461 3462 /* 3463 * Function: 3464 * vdc_handle_cb() 3465 * 3466 * Description: 3467 * 3468 * Arguments: 3469 * event - Type of event (LDC_EVT_xxx) that triggered the callback 3470 * arg - soft state pointer for this instance of the device driver. 3471 * 3472 * Return Code: 3473 * 0 - Success 3474 */ 3475 static uint_t 3476 vdc_handle_cb(uint64_t event, caddr_t arg) 3477 { 3478 ldc_status_t ldc_state; 3479 int rv = 0; 3480 3481 vdc_t *vdc = (vdc_t *)(void *)arg; 3482 3483 ASSERT(vdc != NULL); 3484 3485 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 3486 3487 /* 3488 * Depending on the type of event that triggered this callback, 3489 * we modify the handshake state or read the data. 3490 * 3491 * NOTE: not done as a switch() as event could be triggered by 3492 * a state change and a read request. Also the ordering of the 3493 * check for the event types is deliberate. 3494 */ 3495 if (event & LDC_EVT_UP) { 3496 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 3497 3498 mutex_enter(&vdc->lock); 3499 3500 /* get LDC state */ 3501 rv = ldc_status(vdc->ldc_handle, &ldc_state); 3502 if (rv != 0) { 3503 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 3504 vdc->instance, rv); 3505 return (LDC_SUCCESS); 3506 } 3507 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 3508 /* 3509 * Reset the transaction sequence numbers when 3510 * LDC comes up. We then kick off the handshake 3511 * negotiation with the vDisk server. 3512 */ 3513 vdc->seq_num = 1; 3514 vdc->seq_num_reply = 0; 3515 vdc->ldc_state = ldc_state; 3516 cv_signal(&vdc->initwait_cv); 3517 } 3518 3519 mutex_exit(&vdc->lock); 3520 } 3521 3522 if (event & LDC_EVT_READ) { 3523 DMSG(vdc, 1, "[%d] Received LDC_EVT_READ\n", vdc->instance); 3524 mutex_enter(&vdc->read_lock); 3525 cv_signal(&vdc->read_cv); 3526 vdc->read_state = VDC_READ_PENDING; 3527 mutex_exit(&vdc->read_lock); 3528 3529 /* that's all we have to do - no need to handle DOWN/RESET */ 3530 return (LDC_SUCCESS); 3531 } 3532 3533 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 3534 3535 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3536 3537 mutex_enter(&vdc->lock); 3538 /* 3539 * Need to wake up any readers so they will 3540 * detect that a reset has occurred. 3541 */ 3542 mutex_enter(&vdc->read_lock); 3543 if ((vdc->read_state == VDC_READ_WAITING) || 3544 (vdc->read_state == VDC_READ_RESET)) 3545 cv_signal(&vdc->read_cv); 3546 vdc->read_state = VDC_READ_RESET; 3547 mutex_exit(&vdc->read_lock); 3548 3549 /* wake up any threads waiting for connection to come up */ 3550 if (vdc->state == VDC_STATE_INIT_WAITING) { 3551 vdc->state = VDC_STATE_RESETTING; 3552 cv_signal(&vdc->initwait_cv); 3553 } 3554 3555 mutex_exit(&vdc->lock); 3556 } 3557 3558 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3559 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3560 vdc->instance, event); 3561 3562 return (LDC_SUCCESS); 3563 } 3564 3565 /* 3566 * Function: 3567 * vdc_wait_for_response() 3568 * 3569 * Description: 3570 * Block waiting for a response from the server. If there is 3571 * no data the thread block on the read_cv that is signalled 3572 * by the callback when an EVT_READ occurs. 3573 * 3574 * Arguments: 3575 * vdcp - soft state pointer for this instance of the device driver. 3576 * 3577 * Return Code: 3578 * 0 - Success 3579 */ 3580 static int 3581 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3582 { 3583 size_t nbytes = sizeof (*msgp); 3584 int status; 3585 3586 ASSERT(vdcp != NULL); 3587 3588 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3589 3590 status = vdc_recv(vdcp, msgp, &nbytes); 3591 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3592 status, (int)nbytes); 3593 if (status) { 3594 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3595 vdcp->instance, status); 3596 return (status); 3597 } 3598 3599 if (nbytes < sizeof (vio_msg_tag_t)) { 3600 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3601 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3602 return (ENOMSG); 3603 } 3604 3605 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3606 msgp->tag.vio_msgtype, 3607 msgp->tag.vio_subtype, 3608 msgp->tag.vio_subtype_env); 3609 3610 /* 3611 * Verify the Session ID of the message 3612 * 3613 * Every message after the Version has been negotiated should 3614 * have the correct session ID set. 3615 */ 3616 if ((msgp->tag.vio_sid != vdcp->session_id) && 3617 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3618 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3619 "expected 0x%lx [seq num %lx @ %d]", 3620 vdcp->instance, msgp->tag.vio_sid, 3621 vdcp->session_id, 3622 ((vio_dring_msg_t *)msgp)->seq_num, 3623 ((vio_dring_msg_t *)msgp)->start_idx); 3624 return (ENOMSG); 3625 } 3626 return (0); 3627 } 3628 3629 3630 /* 3631 * Function: 3632 * vdc_resubmit_backup_dring() 3633 * 3634 * Description: 3635 * Resubmit each descriptor in the backed up dring to 3636 * vDisk server. The Dring was backed up during connection 3637 * reset. 3638 * 3639 * Arguments: 3640 * vdcp - soft state pointer for this instance of the device driver. 3641 * 3642 * Return Code: 3643 * 0 - Success 3644 */ 3645 static int 3646 vdc_resubmit_backup_dring(vdc_t *vdcp) 3647 { 3648 int processed = 0; 3649 int count; 3650 int b_idx; 3651 int rv = 0; 3652 int dring_size; 3653 int op; 3654 vio_msg_t vio_msg; 3655 vdc_local_desc_t *curr_ldep; 3656 3657 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3658 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3659 3660 if (vdcp->local_dring_backup == NULL) { 3661 /* the pending requests have already been processed */ 3662 return (0); 3663 } 3664 3665 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3666 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3667 3668 /* 3669 * Walk the backup copy of the local descriptor ring and 3670 * resubmit all the outstanding transactions. 3671 */ 3672 b_idx = vdcp->local_dring_backup_tail; 3673 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3674 3675 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3676 3677 /* only resubmit outstanding transactions */ 3678 if (!curr_ldep->is_free) { 3679 /* 3680 * If we are retrying a block read/write operation we 3681 * need to update the I/O statistics to indicate that 3682 * the request is being put back on the waitq to be 3683 * serviced (it will have been taken off after the 3684 * error was reported). 3685 */ 3686 mutex_enter(&vdcp->lock); 3687 op = curr_ldep->operation; 3688 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 3689 DTRACE_IO1(start, buf_t *, curr_ldep->cb_arg); 3690 VD_KSTAT_WAITQ_ENTER(vdcp); 3691 } 3692 3693 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3694 rv = vdc_populate_descriptor(vdcp, op, 3695 curr_ldep->addr, curr_ldep->nbytes, 3696 curr_ldep->slice, curr_ldep->offset, 3697 curr_ldep->cb_type, curr_ldep->cb_arg, 3698 curr_ldep->dir); 3699 3700 if (rv) { 3701 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 3702 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 3703 VD_KSTAT_WAITQ_EXIT(vdcp); 3704 DTRACE_IO1(done, buf_t *, 3705 curr_ldep->cb_arg); 3706 } 3707 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3708 vdcp->instance, b_idx); 3709 mutex_exit(&vdcp->lock); 3710 goto done; 3711 } 3712 3713 /* 3714 * If this is a block read/write we update the I/O 3715 * statistics kstat to indicate that the request 3716 * has been sent back to the vDisk server and should 3717 * now be put on the run queue. 3718 */ 3719 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 3720 DTRACE_PROBE1(send, buf_t *, curr_ldep->cb_arg); 3721 VD_KSTAT_WAITQ_TO_RUNQ(vdcp); 3722 } 3723 mutex_exit(&vdcp->lock); 3724 3725 /* Wait for the response message. */ 3726 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3727 b_idx); 3728 rv = vdc_wait_for_response(vdcp, &vio_msg); 3729 if (rv) { 3730 /* 3731 * If this is a block read/write we update 3732 * the I/O statistics kstat to take it 3733 * off the run queue. 3734 */ 3735 mutex_enter(&vdcp->lock); 3736 if (op == VD_OP_BREAD || op == VD_OP_BWRITE) { 3737 VD_UPDATE_ERR_STATS(vdcp, vd_transerrs); 3738 VD_KSTAT_RUNQ_EXIT(vdcp); 3739 DTRACE_IO1(done, buf_t *, 3740 curr_ldep->cb_arg); 3741 } 3742 DMSG(vdcp, 1, "[%d] wait_for_response " 3743 "returned err=%d\n", vdcp->instance, 3744 rv); 3745 mutex_exit(&vdcp->lock); 3746 goto done; 3747 } 3748 3749 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3750 rv = vdc_process_data_msg(vdcp, &vio_msg); 3751 if (rv) { 3752 DMSG(vdcp, 1, "[%d] process_data_msg " 3753 "returned err=%d\n", vdcp->instance, 3754 rv); 3755 goto done; 3756 } 3757 processed++; 3758 } 3759 3760 /* get the next element to submit */ 3761 if (++b_idx >= vdcp->local_dring_backup_len) 3762 b_idx = 0; 3763 } 3764 3765 /* all done - now clear up pending dring copy */ 3766 dring_size = vdcp->local_dring_backup_len * 3767 sizeof (vdcp->local_dring_backup[0]); 3768 3769 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3770 3771 vdcp->local_dring_backup = NULL; 3772 3773 done: 3774 DTRACE_PROBE2(processed, int, processed, vdc_t *, vdcp); 3775 3776 return (rv); 3777 } 3778 3779 /* 3780 * Function: 3781 * vdc_cancel_backup_dring 3782 * 3783 * Description: 3784 * Cancel each descriptor in the backed up dring to vDisk server. 3785 * The Dring was backed up during connection reset. 3786 * 3787 * Arguments: 3788 * vdcp - soft state pointer for this instance of the device driver. 3789 * 3790 * Return Code: 3791 * None 3792 */ 3793 void 3794 vdc_cancel_backup_dring(vdc_t *vdcp) 3795 { 3796 vdc_local_desc_t *ldep; 3797 struct buf *bufp; 3798 int count; 3799 int b_idx; 3800 int dring_size; 3801 int cancelled = 0; 3802 3803 ASSERT(MUTEX_HELD(&vdcp->lock)); 3804 ASSERT(vdcp->state == VDC_STATE_INIT || 3805 vdcp->state == VDC_STATE_INIT_WAITING || 3806 vdcp->state == VDC_STATE_NEGOTIATE || 3807 vdcp->state == VDC_STATE_RESETTING); 3808 3809 if (vdcp->local_dring_backup == NULL) { 3810 /* the pending requests have already been processed */ 3811 return; 3812 } 3813 3814 DMSG(vdcp, 1, "cancelling pending dring entries (len=%d, tail=%d)\n", 3815 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3816 3817 /* 3818 * Walk the backup copy of the local descriptor ring and 3819 * cancel all the outstanding transactions. 3820 */ 3821 b_idx = vdcp->local_dring_backup_tail; 3822 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3823 3824 ldep = &(vdcp->local_dring_backup[b_idx]); 3825 3826 /* only cancel outstanding transactions */ 3827 if (!ldep->is_free) { 3828 3829 DMSG(vdcp, 1, "cancelling entry idx=%x\n", b_idx); 3830 cancelled++; 3831 3832 /* 3833 * All requests have already been cleared from the 3834 * local descriptor ring and the LDC channel has been 3835 * reset so we will never get any reply for these 3836 * requests. Now we just have to notify threads waiting 3837 * for replies that the request has failed. 3838 */ 3839 switch (ldep->cb_type) { 3840 case CB_SYNC: 3841 ASSERT(vdcp->sync_op_pending); 3842 vdcp->sync_op_status = EIO; 3843 vdcp->sync_op_pending = B_FALSE; 3844 cv_signal(&vdcp->sync_pending_cv); 3845 break; 3846 3847 case CB_STRATEGY: 3848 bufp = ldep->cb_arg; 3849 ASSERT(bufp != NULL); 3850 bufp->b_resid = bufp->b_bcount; 3851 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 3852 VD_KSTAT_RUNQ_EXIT(vdcp); 3853 DTRACE_IO1(done, buf_t *, bufp); 3854 bioerror(bufp, EIO); 3855 biodone(bufp); 3856 break; 3857 3858 default: 3859 ASSERT(0); 3860 } 3861 3862 } 3863 3864 /* get the next element to cancel */ 3865 if (++b_idx >= vdcp->local_dring_backup_len) 3866 b_idx = 0; 3867 } 3868 3869 /* all done - now clear up pending dring copy */ 3870 dring_size = vdcp->local_dring_backup_len * 3871 sizeof (vdcp->local_dring_backup[0]); 3872 3873 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3874 3875 vdcp->local_dring_backup = NULL; 3876 3877 DTRACE_PROBE2(cancelled, int, cancelled, vdc_t *, vdcp); 3878 } 3879 3880 /* 3881 * Function: 3882 * vdc_connection_timeout 3883 * 3884 * Description: 3885 * This function is invoked if the timeout set to establish the connection 3886 * with vds expires. This will happen if we spend too much time in the 3887 * VDC_STATE_INIT_WAITING or VDC_STATE_NEGOTIATE states. Then we will 3888 * cancel any pending request and mark them as failed. 3889 * 3890 * If the timeout does not expire, it will be cancelled when we reach the 3891 * VDC_STATE_HANDLE_PENDING or VDC_STATE_RESETTING state. This function can 3892 * be invoked while we are in the VDC_STATE_HANDLE_PENDING or 3893 * VDC_STATE_RESETTING state in which case we do nothing because the 3894 * timeout is being cancelled. 3895 * 3896 * Arguments: 3897 * arg - argument of the timeout function actually a soft state 3898 * pointer for the instance of the device driver. 3899 * 3900 * Return Code: 3901 * None 3902 */ 3903 void 3904 vdc_connection_timeout(void *arg) 3905 { 3906 vdc_t *vdcp = (vdc_t *)arg; 3907 3908 mutex_enter(&vdcp->lock); 3909 3910 if (vdcp->state == VDC_STATE_HANDLE_PENDING || 3911 vdcp->state == VDC_STATE_DETACH) { 3912 /* 3913 * The connection has just been re-established or 3914 * we are detaching. 3915 */ 3916 vdcp->ctimeout_reached = B_FALSE; 3917 mutex_exit(&vdcp->lock); 3918 return; 3919 } 3920 3921 vdcp->ctimeout_reached = B_TRUE; 3922 3923 /* notify requests waiting for sending */ 3924 cv_broadcast(&vdcp->running_cv); 3925 3926 /* cancel requests waiting for a result */ 3927 vdc_cancel_backup_dring(vdcp); 3928 3929 mutex_exit(&vdcp->lock); 3930 3931 cmn_err(CE_NOTE, "[%d] connection to service domain timeout", 3932 vdcp->instance); 3933 } 3934 3935 /* 3936 * Function: 3937 * vdc_backup_local_dring() 3938 * 3939 * Description: 3940 * Backup the current dring in the event of a reset. The Dring 3941 * transactions will be resubmitted to the server when the 3942 * connection is restored. 3943 * 3944 * Arguments: 3945 * vdcp - soft state pointer for this instance of the device driver. 3946 * 3947 * Return Code: 3948 * NONE 3949 */ 3950 static void 3951 vdc_backup_local_dring(vdc_t *vdcp) 3952 { 3953 int dring_size; 3954 3955 ASSERT(MUTEX_HELD(&vdcp->lock)); 3956 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3957 3958 /* 3959 * If the backup dring is stil around, it means 3960 * that the last restore did not complete. However, 3961 * since we never got back into the running state, 3962 * the backup copy we have is still valid. 3963 */ 3964 if (vdcp->local_dring_backup != NULL) { 3965 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3966 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3967 vdcp->local_dring_backup_tail); 3968 return; 3969 } 3970 3971 /* 3972 * The backup dring can be NULL and the local dring may not be 3973 * initialized. This can happen if we had a reset while establishing 3974 * a new connection but after the connection has timed out. In that 3975 * case the backup dring is NULL because the requests have been 3976 * cancelled and the request occured before the local dring is 3977 * initialized. 3978 */ 3979 if (!(vdcp->initialized & VDC_DRING_LOCAL)) 3980 return; 3981 3982 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3983 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3984 3985 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3986 3987 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3988 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3989 3990 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3991 vdcp->local_dring_backup_len = vdcp->dring_len; 3992 } 3993 3994 /* -------------------------------------------------------------------------- */ 3995 3996 /* 3997 * The following functions process the incoming messages from vds 3998 */ 3999 4000 /* 4001 * Function: 4002 * vdc_process_msg_thread() 4003 * 4004 * Description: 4005 * 4006 * Main VDC message processing thread. Each vDisk instance 4007 * consists of a copy of this thread. This thread triggers 4008 * all the handshakes and data exchange with the server. It 4009 * also handles all channel resets 4010 * 4011 * Arguments: 4012 * vdc - soft state pointer for this instance of the device driver. 4013 * 4014 * Return Code: 4015 * None 4016 */ 4017 static void 4018 vdc_process_msg_thread(vdc_t *vdcp) 4019 { 4020 int status; 4021 int ctimeout; 4022 timeout_id_t tmid = 0; 4023 4024 mutex_enter(&vdcp->lock); 4025 4026 for (;;) { 4027 4028 #define Q(_s) (vdcp->state == _s) ? #_s : 4029 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 4030 Q(VDC_STATE_INIT) 4031 Q(VDC_STATE_INIT_WAITING) 4032 Q(VDC_STATE_NEGOTIATE) 4033 Q(VDC_STATE_HANDLE_PENDING) 4034 Q(VDC_STATE_RUNNING) 4035 Q(VDC_STATE_RESETTING) 4036 Q(VDC_STATE_DETACH) 4037 "UNKNOWN"); 4038 4039 switch (vdcp->state) { 4040 case VDC_STATE_INIT: 4041 4042 /* 4043 * If requested, start a timeout to check if the 4044 * connection with vds is established in the 4045 * specified delay. If the timeout expires, we 4046 * will cancel any pending request. 4047 * 4048 * If some reset have occurred while establishing 4049 * the connection, we already have a timeout armed 4050 * and in that case we don't need to arm a new one. 4051 */ 4052 ctimeout = (vdc_timeout != 0)? 4053 vdc_timeout : vdcp->ctimeout; 4054 4055 if (ctimeout != 0 && tmid == 0) { 4056 tmid = timeout(vdc_connection_timeout, vdcp, 4057 ctimeout * drv_usectohz(1000000)); 4058 } 4059 4060 /* Check if have re-initializing repeatedly */ 4061 if (vdcp->hshake_cnt++ > vdc_hshake_retries && 4062 vdcp->lifecycle != VDC_LC_ONLINE) { 4063 cmn_err(CE_NOTE, "[%d] disk access failed.\n", 4064 vdcp->instance); 4065 vdcp->state = VDC_STATE_DETACH; 4066 break; 4067 } 4068 4069 /* Bring up connection with vds via LDC */ 4070 status = vdc_start_ldc_connection(vdcp); 4071 if (status == EINVAL) { 4072 DMSG(vdcp, 0, "[%d] Could not start LDC", 4073 vdcp->instance); 4074 vdcp->state = VDC_STATE_DETACH; 4075 } else { 4076 vdcp->state = VDC_STATE_INIT_WAITING; 4077 } 4078 break; 4079 4080 case VDC_STATE_INIT_WAITING: 4081 4082 /* 4083 * Let the callback event move us on 4084 * when channel is open to server 4085 */ 4086 while (vdcp->ldc_state != LDC_UP) { 4087 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 4088 if (vdcp->state != VDC_STATE_INIT_WAITING) { 4089 DMSG(vdcp, 0, 4090 "state moved to %d out from under us...\n", 4091 vdcp->state); 4092 4093 break; 4094 } 4095 } 4096 if (vdcp->state == VDC_STATE_INIT_WAITING && 4097 vdcp->ldc_state == LDC_UP) { 4098 vdcp->state = VDC_STATE_NEGOTIATE; 4099 } 4100 break; 4101 4102 case VDC_STATE_NEGOTIATE: 4103 switch (status = vdc_ver_negotiation(vdcp)) { 4104 case 0: 4105 break; 4106 default: 4107 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 4108 status); 4109 goto reset; 4110 } 4111 4112 switch (status = vdc_attr_negotiation(vdcp)) { 4113 case 0: 4114 break; 4115 default: 4116 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 4117 status); 4118 goto reset; 4119 } 4120 4121 switch (status = vdc_dring_negotiation(vdcp)) { 4122 case 0: 4123 break; 4124 default: 4125 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 4126 status); 4127 goto reset; 4128 } 4129 4130 switch (status = vdc_rdx_exchange(vdcp)) { 4131 case 0: 4132 vdcp->state = VDC_STATE_HANDLE_PENDING; 4133 goto done; 4134 default: 4135 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 4136 status); 4137 goto reset; 4138 } 4139 reset: 4140 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 4141 status); 4142 vdcp->state = VDC_STATE_RESETTING; 4143 vdcp->self_reset = B_TRUE; 4144 done: 4145 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 4146 vdcp->state); 4147 break; 4148 4149 case VDC_STATE_HANDLE_PENDING: 4150 4151 if (vdcp->ctimeout_reached) { 4152 /* 4153 * The connection timeout had been reached so 4154 * pending requests have been cancelled. Now 4155 * that the connection is back we can reset 4156 * the timeout. 4157 */ 4158 ASSERT(vdcp->local_dring_backup == NULL); 4159 ASSERT(tmid != 0); 4160 tmid = 0; 4161 vdcp->ctimeout_reached = B_FALSE; 4162 vdcp->state = VDC_STATE_RUNNING; 4163 DMSG(vdcp, 0, "[%d] connection to service " 4164 "domain is up", vdcp->instance); 4165 break; 4166 } 4167 4168 mutex_exit(&vdcp->lock); 4169 if (tmid != 0) { 4170 (void) untimeout(tmid); 4171 tmid = 0; 4172 } 4173 status = vdc_resubmit_backup_dring(vdcp); 4174 mutex_enter(&vdcp->lock); 4175 4176 if (status) 4177 vdcp->state = VDC_STATE_RESETTING; 4178 else 4179 vdcp->state = VDC_STATE_RUNNING; 4180 4181 break; 4182 4183 /* enter running state */ 4184 case VDC_STATE_RUNNING: 4185 /* 4186 * Signal anyone waiting for the connection 4187 * to come on line. 4188 */ 4189 vdcp->hshake_cnt = 0; 4190 cv_broadcast(&vdcp->running_cv); 4191 4192 /* failfast has to been checked after reset */ 4193 cv_signal(&vdcp->failfast_cv); 4194 4195 /* ownership is lost during reset */ 4196 if (vdcp->ownership & VDC_OWNERSHIP_WANTED) 4197 vdcp->ownership |= VDC_OWNERSHIP_RESET; 4198 cv_signal(&vdcp->ownership_cv); 4199 4200 mutex_exit(&vdcp->lock); 4201 4202 for (;;) { 4203 vio_msg_t msg; 4204 status = vdc_wait_for_response(vdcp, &msg); 4205 if (status) break; 4206 4207 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 4208 vdcp->instance); 4209 status = vdc_process_data_msg(vdcp, &msg); 4210 if (status) { 4211 DMSG(vdcp, 1, "[%d] process_data_msg " 4212 "returned err=%d\n", vdcp->instance, 4213 status); 4214 break; 4215 } 4216 4217 } 4218 4219 mutex_enter(&vdcp->lock); 4220 4221 vdcp->state = VDC_STATE_RESETTING; 4222 vdcp->self_reset = B_TRUE; 4223 break; 4224 4225 case VDC_STATE_RESETTING: 4226 /* 4227 * When we reach this state, we either come from the 4228 * VDC_STATE_RUNNING state and we can have pending 4229 * request but no timeout is armed; or we come from 4230 * the VDC_STATE_INIT_WAITING, VDC_NEGOTIATE or 4231 * VDC_HANDLE_PENDING state and there is no pending 4232 * request or pending requests have already been copied 4233 * into the backup dring. So we can safely keep the 4234 * connection timeout armed while we are in this state. 4235 */ 4236 4237 DMSG(vdcp, 0, "Initiating channel reset " 4238 "(pending = %d)\n", (int)vdcp->threads_pending); 4239 4240 if (vdcp->self_reset) { 4241 DMSG(vdcp, 0, 4242 "[%d] calling stop_ldc_connection.\n", 4243 vdcp->instance); 4244 status = vdc_stop_ldc_connection(vdcp); 4245 vdcp->self_reset = B_FALSE; 4246 } 4247 4248 /* 4249 * Wait for all threads currently waiting 4250 * for a free dring entry to use. 4251 */ 4252 while (vdcp->threads_pending) { 4253 cv_broadcast(&vdcp->membind_cv); 4254 cv_broadcast(&vdcp->dring_free_cv); 4255 mutex_exit(&vdcp->lock); 4256 /* give the waiters enough time to wake up */ 4257 delay(vdc_hz_min_ldc_delay); 4258 mutex_enter(&vdcp->lock); 4259 } 4260 4261 ASSERT(vdcp->threads_pending == 0); 4262 4263 /* Sanity check that no thread is receiving */ 4264 ASSERT(vdcp->read_state != VDC_READ_WAITING); 4265 4266 vdcp->read_state = VDC_READ_IDLE; 4267 4268 vdc_backup_local_dring(vdcp); 4269 4270 /* cleanup the old d-ring */ 4271 vdc_destroy_descriptor_ring(vdcp); 4272 4273 /* go and start again */ 4274 vdcp->state = VDC_STATE_INIT; 4275 4276 break; 4277 4278 case VDC_STATE_DETACH: 4279 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 4280 vdcp->instance); 4281 4282 /* cancel any pending timeout */ 4283 mutex_exit(&vdcp->lock); 4284 if (tmid != 0) { 4285 (void) untimeout(tmid); 4286 tmid = 0; 4287 } 4288 mutex_enter(&vdcp->lock); 4289 4290 /* 4291 * Signal anyone waiting for connection 4292 * to come online 4293 */ 4294 cv_broadcast(&vdcp->running_cv); 4295 4296 while (vdcp->sync_op_pending) { 4297 cv_signal(&vdcp->sync_pending_cv); 4298 cv_signal(&vdcp->sync_blocked_cv); 4299 mutex_exit(&vdcp->lock); 4300 /* give the waiters enough time to wake up */ 4301 delay(vdc_hz_min_ldc_delay); 4302 mutex_enter(&vdcp->lock); 4303 } 4304 4305 mutex_exit(&vdcp->lock); 4306 4307 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 4308 vdcp->instance); 4309 thread_exit(); 4310 break; 4311 } 4312 } 4313 } 4314 4315 4316 /* 4317 * Function: 4318 * vdc_process_data_msg() 4319 * 4320 * Description: 4321 * This function is called by the message processing thread each time 4322 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 4323 * be an ACK or NACK from vds[1] which vdc handles as follows. 4324 * ACK - wake up the waiting thread 4325 * NACK - resend any messages necessary 4326 * 4327 * [1] Although the message format allows it, vds should not send a 4328 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 4329 * some bizarre reason it does, vdc will reset the connection. 4330 * 4331 * Arguments: 4332 * vdc - soft state pointer for this instance of the device driver. 4333 * msg - the LDC message sent by vds 4334 * 4335 * Return Code: 4336 * 0 - Success. 4337 * > 0 - error value returned by LDC 4338 */ 4339 static int 4340 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 4341 { 4342 int status = 0; 4343 vio_dring_msg_t *dring_msg; 4344 vdc_local_desc_t *ldep = NULL; 4345 int start, end; 4346 int idx; 4347 int op; 4348 4349 dring_msg = (vio_dring_msg_t *)msg; 4350 4351 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 4352 ASSERT(vdcp != NULL); 4353 4354 mutex_enter(&vdcp->lock); 4355 4356 /* 4357 * Check to see if the message has bogus data 4358 */ 4359 idx = start = dring_msg->start_idx; 4360 end = dring_msg->end_idx; 4361 if ((start >= vdcp->dring_len) || 4362 (end >= vdcp->dring_len) || (end < -1)) { 4363 /* 4364 * Update the I/O statistics to indicate that an error ocurred. 4365 * No need to update the wait/run queues as no specific read or 4366 * write request is being completed in response to this 'msg'. 4367 */ 4368 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 4369 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 4370 vdcp->instance, start, end); 4371 mutex_exit(&vdcp->lock); 4372 return (EINVAL); 4373 } 4374 4375 /* 4376 * Verify that the sequence number is what vdc expects. 4377 */ 4378 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 4379 case VDC_SEQ_NUM_TODO: 4380 break; /* keep processing this message */ 4381 case VDC_SEQ_NUM_SKIP: 4382 mutex_exit(&vdcp->lock); 4383 return (0); 4384 case VDC_SEQ_NUM_INVALID: 4385 /* 4386 * Update the I/O statistics to indicate that an error ocurred. 4387 * No need to update the wait/run queues as no specific read or 4388 * write request is being completed in response to this 'msg'. 4389 */ 4390 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 4391 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 4392 mutex_exit(&vdcp->lock); 4393 return (ENXIO); 4394 } 4395 4396 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 4397 /* 4398 * Update the I/O statistics to indicate that an error ocurred. 4399 * 4400 * We need to update the run queue if a read or write request 4401 * is being NACKed - otherwise there will appear to be an 4402 * indefinite outstanding request and statistics reported by 4403 * iostat(1M) will be incorrect. The transaction will be 4404 * resubmitted from the backup DRing following the reset 4405 * and the wait/run queues will be entered again. 4406 */ 4407 ldep = &vdcp->local_dring[idx]; 4408 op = ldep->operation; 4409 if ((op == VD_OP_BREAD) || (op == VD_OP_BWRITE)) { 4410 DTRACE_IO1(done, buf_t *, ldep->cb_arg); 4411 VD_KSTAT_RUNQ_EXIT(vdcp); 4412 } 4413 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 4414 VDC_DUMP_DRING_MSG(dring_msg); 4415 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 4416 mutex_exit(&vdcp->lock); 4417 return (EIO); 4418 4419 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 4420 /* 4421 * Update the I/O statistics to indicate that an error occurred. 4422 * No need to update the wait/run queues as no specific read or 4423 * write request is being completed in response to this 'msg'. 4424 */ 4425 VD_UPDATE_ERR_STATS(vdcp, vd_protoerrs); 4426 mutex_exit(&vdcp->lock); 4427 return (EPROTO); 4428 } 4429 4430 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 4431 ASSERT(start == end); 4432 4433 ldep = &vdcp->local_dring[idx]; 4434 4435 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 4436 ldep->dep->hdr.dstate, ldep->cb_type); 4437 4438 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 4439 struct buf *bufp; 4440 4441 switch (ldep->cb_type) { 4442 case CB_SYNC: 4443 ASSERT(vdcp->sync_op_pending); 4444 4445 status = vdc_depopulate_descriptor(vdcp, idx); 4446 vdcp->sync_op_status = status; 4447 vdcp->sync_op_pending = B_FALSE; 4448 cv_signal(&vdcp->sync_pending_cv); 4449 break; 4450 4451 case CB_STRATEGY: 4452 bufp = ldep->cb_arg; 4453 ASSERT(bufp != NULL); 4454 bufp->b_resid = 4455 bufp->b_bcount - ldep->dep->payload.nbytes; 4456 status = ldep->dep->payload.status; /* Future:ntoh */ 4457 if (status != 0) { 4458 DMSG(vdcp, 1, "strategy status=%d\n", status); 4459 VD_UPDATE_ERR_STATS(vdcp, vd_softerrs); 4460 bioerror(bufp, status); 4461 } 4462 4463 (void) vdc_depopulate_descriptor(vdcp, idx); 4464 4465 DMSG(vdcp, 1, 4466 "strategy complete req=%ld bytes resp=%ld bytes\n", 4467 bufp->b_bcount, ldep->dep->payload.nbytes); 4468 4469 if (status != 0 && vdcp->failfast_interval != 0) { 4470 /* 4471 * The I/O has failed and failfast is enabled. 4472 * We need the failfast thread to check if the 4473 * failure is due to a reservation conflict. 4474 */ 4475 (void) vdc_failfast_io_queue(vdcp, bufp); 4476 } else { 4477 if (status == 0) { 4478 op = (bufp->b_flags & B_READ) ? 4479 VD_OP_BREAD : VD_OP_BWRITE; 4480 VD_UPDATE_IO_STATS(vdcp, op, 4481 ldep->dep->payload.nbytes); 4482 } 4483 VD_KSTAT_RUNQ_EXIT(vdcp); 4484 DTRACE_IO1(done, buf_t *, bufp); 4485 biodone(bufp); 4486 } 4487 break; 4488 4489 default: 4490 ASSERT(0); 4491 } 4492 } 4493 4494 /* let the arrival signal propogate */ 4495 mutex_exit(&vdcp->lock); 4496 4497 /* probe gives the count of how many entries were processed */ 4498 DTRACE_PROBE2(processed, int, 1, vdc_t *, vdcp); 4499 4500 return (0); 4501 } 4502 4503 4504 /* 4505 * Function: 4506 * vdc_handle_ver_msg() 4507 * 4508 * Description: 4509 * 4510 * Arguments: 4511 * vdc - soft state pointer for this instance of the device driver. 4512 * ver_msg - LDC message sent by vDisk server 4513 * 4514 * Return Code: 4515 * 0 - Success 4516 */ 4517 static int 4518 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 4519 { 4520 int status = 0; 4521 4522 ASSERT(vdc != NULL); 4523 ASSERT(mutex_owned(&vdc->lock)); 4524 4525 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 4526 return (EPROTO); 4527 } 4528 4529 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 4530 return (EINVAL); 4531 } 4532 4533 switch (ver_msg->tag.vio_subtype) { 4534 case VIO_SUBTYPE_ACK: 4535 /* 4536 * We check to see if the version returned is indeed supported 4537 * (The server may have also adjusted the minor number downwards 4538 * and if so 'ver_msg' will contain the actual version agreed) 4539 */ 4540 if (vdc_is_supported_version(ver_msg)) { 4541 vdc->ver.major = ver_msg->ver_major; 4542 vdc->ver.minor = ver_msg->ver_minor; 4543 ASSERT(vdc->ver.major > 0); 4544 } else { 4545 status = EPROTO; 4546 } 4547 break; 4548 4549 case VIO_SUBTYPE_NACK: 4550 /* 4551 * call vdc_is_supported_version() which will return the next 4552 * supported version (if any) in 'ver_msg' 4553 */ 4554 (void) vdc_is_supported_version(ver_msg); 4555 if (ver_msg->ver_major > 0) { 4556 size_t len = sizeof (*ver_msg); 4557 4558 ASSERT(vdc->ver.major > 0); 4559 4560 /* reset the necessary fields and resend */ 4561 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 4562 ver_msg->dev_class = VDEV_DISK; 4563 4564 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 4565 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 4566 vdc->instance, status); 4567 if (len != sizeof (*ver_msg)) 4568 status = EBADMSG; 4569 } else { 4570 DMSG(vdc, 0, "[%d] No common version with vDisk server", 4571 vdc->instance); 4572 status = ENOTSUP; 4573 } 4574 4575 break; 4576 case VIO_SUBTYPE_INFO: 4577 /* 4578 * Handle the case where vds starts handshake 4579 * (for now only vdc is the instigator) 4580 */ 4581 status = ENOTSUP; 4582 break; 4583 4584 default: 4585 status = EINVAL; 4586 break; 4587 } 4588 4589 return (status); 4590 } 4591 4592 /* 4593 * Function: 4594 * vdc_handle_attr_msg() 4595 * 4596 * Description: 4597 * 4598 * Arguments: 4599 * vdc - soft state pointer for this instance of the device driver. 4600 * attr_msg - LDC message sent by vDisk server 4601 * 4602 * Return Code: 4603 * 0 - Success 4604 */ 4605 static int 4606 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 4607 { 4608 int status = 0; 4609 4610 ASSERT(vdc != NULL); 4611 ASSERT(mutex_owned(&vdc->lock)); 4612 4613 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 4614 return (EPROTO); 4615 } 4616 4617 switch (attr_msg->tag.vio_subtype) { 4618 case VIO_SUBTYPE_ACK: 4619 /* 4620 * We now verify the attributes sent by vds. 4621 */ 4622 if (attr_msg->vdisk_size == 0) { 4623 DMSG(vdc, 0, "[%d] Invalid disk size from vds", 4624 vdc->instance); 4625 status = EINVAL; 4626 break; 4627 } 4628 4629 if (attr_msg->max_xfer_sz == 0) { 4630 DMSG(vdc, 0, "[%d] Invalid transfer size from vds", 4631 vdc->instance); 4632 status = EINVAL; 4633 break; 4634 } 4635 4636 if (attr_msg->vdisk_size == VD_SIZE_UNKNOWN) { 4637 DMSG(vdc, 0, "[%d] Unknown disk size from vds", 4638 vdc->instance); 4639 attr_msg->vdisk_size = 0; 4640 } 4641 4642 /* 4643 * If the disk size is already set check that it hasn't changed. 4644 */ 4645 if ((vdc->vdisk_size != 0) && (attr_msg->vdisk_size != 0) && 4646 (vdc->vdisk_size != attr_msg->vdisk_size)) { 4647 DMSG(vdc, 0, "[%d] Different disk size from vds " 4648 "(old=0x%lx - new=0x%lx", vdc->instance, 4649 vdc->vdisk_size, attr_msg->vdisk_size) 4650 status = EINVAL; 4651 break; 4652 } 4653 4654 vdc->vdisk_size = attr_msg->vdisk_size; 4655 vdc->vdisk_type = attr_msg->vdisk_type; 4656 vdc->operations = attr_msg->operations; 4657 if (vio_ver_is_supported(vdc->ver, 1, 1)) 4658 vdc->vdisk_media = attr_msg->vdisk_media; 4659 else 4660 vdc->vdisk_media = 0; 4661 4662 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 4663 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 4664 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 4665 vdc->instance, vdc->block_size, 4666 attr_msg->vdisk_block_size); 4667 4668 /* 4669 * We don't know at compile time what the vDisk server will 4670 * think are good values but we apply a large (arbitrary) 4671 * upper bound to prevent memory exhaustion in vdc if it was 4672 * allocating a DRing based of huge values sent by the server. 4673 * We probably will never exceed this except if the message 4674 * was garbage. 4675 */ 4676 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 4677 (PAGESIZE * DEV_BSIZE)) { 4678 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 4679 vdc->block_size = attr_msg->vdisk_block_size; 4680 } else { 4681 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 4682 " using max supported by vdc", vdc->instance); 4683 } 4684 4685 if ((attr_msg->xfer_mode != VIO_DRING_MODE_V1_0) || 4686 (attr_msg->vdisk_size > INT64_MAX) || 4687 (attr_msg->operations == 0) || 4688 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 4689 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 4690 vdc->instance); 4691 status = EINVAL; 4692 break; 4693 } 4694 4695 /* 4696 * Now that we have received all attributes we can create a 4697 * fake geometry for the disk. 4698 */ 4699 vdc_create_fake_geometry(vdc); 4700 break; 4701 4702 case VIO_SUBTYPE_NACK: 4703 /* 4704 * vds could not handle the attributes we sent so we 4705 * stop negotiating. 4706 */ 4707 status = EPROTO; 4708 break; 4709 4710 case VIO_SUBTYPE_INFO: 4711 /* 4712 * Handle the case where vds starts the handshake 4713 * (for now; vdc is the only supported instigatior) 4714 */ 4715 status = ENOTSUP; 4716 break; 4717 4718 default: 4719 status = ENOTSUP; 4720 break; 4721 } 4722 4723 return (status); 4724 } 4725 4726 /* 4727 * Function: 4728 * vdc_handle_dring_reg_msg() 4729 * 4730 * Description: 4731 * 4732 * Arguments: 4733 * vdc - soft state pointer for this instance of the driver. 4734 * dring_msg - LDC message sent by vDisk server 4735 * 4736 * Return Code: 4737 * 0 - Success 4738 */ 4739 static int 4740 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 4741 { 4742 int status = 0; 4743 4744 ASSERT(vdc != NULL); 4745 ASSERT(mutex_owned(&vdc->lock)); 4746 4747 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 4748 return (EPROTO); 4749 } 4750 4751 switch (dring_msg->tag.vio_subtype) { 4752 case VIO_SUBTYPE_ACK: 4753 /* save the received dring_ident */ 4754 vdc->dring_ident = dring_msg->dring_ident; 4755 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 4756 vdc->instance, vdc->dring_ident); 4757 break; 4758 4759 case VIO_SUBTYPE_NACK: 4760 /* 4761 * vds could not handle the DRing info we sent so we 4762 * stop negotiating. 4763 */ 4764 DMSG(vdc, 0, "[%d] server could not register DRing\n", 4765 vdc->instance); 4766 status = EPROTO; 4767 break; 4768 4769 case VIO_SUBTYPE_INFO: 4770 /* 4771 * Handle the case where vds starts handshake 4772 * (for now only vdc is the instigatior) 4773 */ 4774 status = ENOTSUP; 4775 break; 4776 default: 4777 status = ENOTSUP; 4778 } 4779 4780 return (status); 4781 } 4782 4783 /* 4784 * Function: 4785 * vdc_verify_seq_num() 4786 * 4787 * Description: 4788 * This functions verifies that the sequence number sent back by the vDisk 4789 * server with the latest message is what is expected (i.e. it is greater 4790 * than the last seq num sent by the vDisk server and less than or equal 4791 * to the last seq num generated by vdc). 4792 * 4793 * It then checks the request ID to see if any requests need processing 4794 * in the DRing. 4795 * 4796 * Arguments: 4797 * vdc - soft state pointer for this instance of the driver. 4798 * dring_msg - pointer to the LDC message sent by vds 4799 * 4800 * Return Code: 4801 * VDC_SEQ_NUM_TODO - Message needs to be processed 4802 * VDC_SEQ_NUM_SKIP - Message has already been processed 4803 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 4804 * vdc cannot deal with them 4805 */ 4806 static int 4807 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 4808 { 4809 ASSERT(vdc != NULL); 4810 ASSERT(dring_msg != NULL); 4811 ASSERT(mutex_owned(&vdc->lock)); 4812 4813 /* 4814 * Check to see if the messages were responded to in the correct 4815 * order by vds. 4816 */ 4817 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 4818 (dring_msg->seq_num > vdc->seq_num)) { 4819 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 4820 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 4821 vdc->instance, dring_msg->seq_num, 4822 vdc->seq_num_reply, vdc->seq_num, 4823 vdc->req_id_proc, vdc->req_id); 4824 return (VDC_SEQ_NUM_INVALID); 4825 } 4826 vdc->seq_num_reply = dring_msg->seq_num; 4827 4828 if (vdc->req_id_proc < vdc->req_id) 4829 return (VDC_SEQ_NUM_TODO); 4830 else 4831 return (VDC_SEQ_NUM_SKIP); 4832 } 4833 4834 4835 /* 4836 * Function: 4837 * vdc_is_supported_version() 4838 * 4839 * Description: 4840 * This routine checks if the major/minor version numbers specified in 4841 * 'ver_msg' are supported. If not it finds the next version that is 4842 * in the supported version list 'vdc_version[]' and sets the fields in 4843 * 'ver_msg' to those values 4844 * 4845 * Arguments: 4846 * ver_msg - LDC message sent by vDisk server 4847 * 4848 * Return Code: 4849 * B_TRUE - Success 4850 * B_FALSE - Version not supported 4851 */ 4852 static boolean_t 4853 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 4854 { 4855 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 4856 4857 for (int i = 0; i < vdc_num_versions; i++) { 4858 ASSERT(vdc_version[i].major > 0); 4859 ASSERT((i == 0) || 4860 (vdc_version[i].major < vdc_version[i-1].major)); 4861 4862 /* 4863 * If the major versions match, adjust the minor version, if 4864 * necessary, down to the highest value supported by this 4865 * client. The server should support all minor versions lower 4866 * than the value it sent 4867 */ 4868 if (ver_msg->ver_major == vdc_version[i].major) { 4869 if (ver_msg->ver_minor > vdc_version[i].minor) { 4870 DMSGX(0, 4871 "Adjusting minor version from %u to %u", 4872 ver_msg->ver_minor, vdc_version[i].minor); 4873 ver_msg->ver_minor = vdc_version[i].minor; 4874 } 4875 return (B_TRUE); 4876 } 4877 4878 /* 4879 * If the message contains a higher major version number, set 4880 * the message's major/minor versions to the current values 4881 * and return false, so this message will get resent with 4882 * these values, and the server will potentially try again 4883 * with the same or a lower version 4884 */ 4885 if (ver_msg->ver_major > vdc_version[i].major) { 4886 ver_msg->ver_major = vdc_version[i].major; 4887 ver_msg->ver_minor = vdc_version[i].minor; 4888 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 4889 ver_msg->ver_major, ver_msg->ver_minor); 4890 4891 return (B_FALSE); 4892 } 4893 4894 /* 4895 * Otherwise, the message's major version is less than the 4896 * current major version, so continue the loop to the next 4897 * (lower) supported version 4898 */ 4899 } 4900 4901 /* 4902 * No common version was found; "ground" the version pair in the 4903 * message to terminate negotiation 4904 */ 4905 ver_msg->ver_major = 0; 4906 ver_msg->ver_minor = 0; 4907 4908 return (B_FALSE); 4909 } 4910 /* -------------------------------------------------------------------------- */ 4911 4912 /* 4913 * DKIO(7) support 4914 */ 4915 4916 typedef struct vdc_dk_arg { 4917 struct dk_callback dkc; 4918 int mode; 4919 dev_t dev; 4920 vdc_t *vdc; 4921 } vdc_dk_arg_t; 4922 4923 /* 4924 * Function: 4925 * vdc_dkio_flush_cb() 4926 * 4927 * Description: 4928 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4929 * by kernel code. 4930 * 4931 * Arguments: 4932 * arg - a pointer to a vdc_dk_arg_t structure. 4933 */ 4934 void 4935 vdc_dkio_flush_cb(void *arg) 4936 { 4937 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4938 struct dk_callback *dkc = NULL; 4939 vdc_t *vdc = NULL; 4940 int rv; 4941 4942 if (dk_arg == NULL) { 4943 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4944 return; 4945 } 4946 dkc = &dk_arg->dkc; 4947 vdc = dk_arg->vdc; 4948 ASSERT(vdc != NULL); 4949 4950 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4951 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 4952 if (rv != 0) { 4953 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4954 vdc->instance, rv, 4955 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4956 } 4957 4958 /* 4959 * Trigger the call back to notify the caller the the ioctl call has 4960 * been completed. 4961 */ 4962 if ((dk_arg->mode & FKIOCTL) && 4963 (dkc != NULL) && 4964 (dkc->dkc_callback != NULL)) { 4965 ASSERT(dkc->dkc_cookie != NULL); 4966 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4967 } 4968 4969 /* Indicate that one less DKIO write flush is outstanding */ 4970 mutex_enter(&vdc->lock); 4971 vdc->dkio_flush_pending--; 4972 ASSERT(vdc->dkio_flush_pending >= 0); 4973 mutex_exit(&vdc->lock); 4974 4975 /* free the mem that was allocated when the callback was dispatched */ 4976 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4977 } 4978 4979 /* 4980 * Function: 4981 * vdc_dkio_gapart() 4982 * 4983 * Description: 4984 * This function implements the DKIOCGAPART ioctl. 4985 * 4986 * Arguments: 4987 * vdc - soft state pointer 4988 * arg - a pointer to a dk_map[NDKMAP] or dk_map32[NDKMAP] structure 4989 * flag - ioctl flags 4990 */ 4991 static int 4992 vdc_dkio_gapart(vdc_t *vdc, caddr_t arg, int flag) 4993 { 4994 struct dk_geom *geom; 4995 struct vtoc *vtoc; 4996 union { 4997 struct dk_map map[NDKMAP]; 4998 struct dk_map32 map32[NDKMAP]; 4999 } data; 5000 int i, rv, size; 5001 5002 mutex_enter(&vdc->lock); 5003 5004 if ((rv = vdc_validate_geometry(vdc)) != 0) { 5005 mutex_exit(&vdc->lock); 5006 return (rv); 5007 } 5008 5009 vtoc = vdc->vtoc; 5010 geom = vdc->geom; 5011 5012 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 5013 5014 for (i = 0; i < vtoc->v_nparts; i++) { 5015 data.map32[i].dkl_cylno = vtoc->v_part[i].p_start / 5016 (geom->dkg_nhead * geom->dkg_nsect); 5017 data.map32[i].dkl_nblk = vtoc->v_part[i].p_size; 5018 } 5019 size = NDKMAP * sizeof (struct dk_map32); 5020 5021 } else { 5022 5023 for (i = 0; i < vtoc->v_nparts; i++) { 5024 data.map[i].dkl_cylno = vtoc->v_part[i].p_start / 5025 (geom->dkg_nhead * geom->dkg_nsect); 5026 data.map[i].dkl_nblk = vtoc->v_part[i].p_size; 5027 } 5028 size = NDKMAP * sizeof (struct dk_map); 5029 5030 } 5031 5032 mutex_exit(&vdc->lock); 5033 5034 if (ddi_copyout(&data, arg, size, flag) != 0) 5035 return (EFAULT); 5036 5037 return (0); 5038 } 5039 5040 /* 5041 * Function: 5042 * vdc_dkio_partition() 5043 * 5044 * Description: 5045 * This function implements the DKIOCPARTITION ioctl. 5046 * 5047 * Arguments: 5048 * vdc - soft state pointer 5049 * arg - a pointer to a struct partition64 structure 5050 * flag - ioctl flags 5051 */ 5052 static int 5053 vdc_dkio_partition(vdc_t *vdc, caddr_t arg, int flag) 5054 { 5055 struct partition64 p64; 5056 efi_gpt_t *gpt; 5057 efi_gpe_t *gpe; 5058 vd_efi_dev_t edev; 5059 uint_t partno; 5060 int rv; 5061 5062 if (ddi_copyin(arg, &p64, sizeof (struct partition64), flag)) { 5063 return (EFAULT); 5064 } 5065 5066 VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 5067 5068 if ((rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe)) != 0) { 5069 return (rv); 5070 } 5071 5072 partno = p64.p_partno; 5073 5074 if (partno >= gpt->efi_gpt_NumberOfPartitionEntries) { 5075 vd_efi_free(&edev, gpt, gpe); 5076 return (ESRCH); 5077 } 5078 5079 bcopy(&gpe[partno].efi_gpe_PartitionTypeGUID, &p64.p_type, 5080 sizeof (struct uuid)); 5081 p64.p_start = gpe[partno].efi_gpe_StartingLBA; 5082 p64.p_size = gpe[partno].efi_gpe_EndingLBA - p64.p_start + 1; 5083 5084 if (ddi_copyout(&p64, arg, sizeof (struct partition64), flag)) { 5085 vd_efi_free(&edev, gpt, gpe); 5086 return (EFAULT); 5087 } 5088 5089 vd_efi_free(&edev, gpt, gpe); 5090 return (0); 5091 } 5092 5093 /* 5094 * Function: 5095 * vdc_dioctl_rwcmd() 5096 * 5097 * Description: 5098 * This function implements the DIOCTL_RWCMD ioctl. This ioctl is used 5099 * for DKC_DIRECT disks to read or write at an absolute disk offset. 5100 * 5101 * Arguments: 5102 * dev - device 5103 * arg - a pointer to a dadkio_rwcmd or dadkio_rwcmd32 structure 5104 * flag - ioctl flags 5105 */ 5106 static int 5107 vdc_dioctl_rwcmd(dev_t dev, caddr_t arg, int flag) 5108 { 5109 struct dadkio_rwcmd32 rwcmd32; 5110 struct dadkio_rwcmd rwcmd; 5111 struct iovec aiov; 5112 struct uio auio; 5113 int rw, status; 5114 struct buf *buf; 5115 5116 if (ddi_model_convert_from(flag & FMODELS) == DDI_MODEL_ILP32) { 5117 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd32, 5118 sizeof (struct dadkio_rwcmd32), flag)) { 5119 return (EFAULT); 5120 } 5121 rwcmd.cmd = rwcmd32.cmd; 5122 rwcmd.flags = rwcmd32.flags; 5123 rwcmd.blkaddr = (daddr_t)rwcmd32.blkaddr; 5124 rwcmd.buflen = rwcmd32.buflen; 5125 rwcmd.bufaddr = (caddr_t)(uintptr_t)rwcmd32.bufaddr; 5126 } else { 5127 if (ddi_copyin((caddr_t)arg, (caddr_t)&rwcmd, 5128 sizeof (struct dadkio_rwcmd), flag)) { 5129 return (EFAULT); 5130 } 5131 } 5132 5133 switch (rwcmd.cmd) { 5134 case DADKIO_RWCMD_READ: 5135 rw = B_READ; 5136 break; 5137 case DADKIO_RWCMD_WRITE: 5138 rw = B_WRITE; 5139 break; 5140 default: 5141 return (EINVAL); 5142 } 5143 5144 bzero((caddr_t)&aiov, sizeof (struct iovec)); 5145 aiov.iov_base = rwcmd.bufaddr; 5146 aiov.iov_len = rwcmd.buflen; 5147 5148 bzero((caddr_t)&auio, sizeof (struct uio)); 5149 auio.uio_iov = &aiov; 5150 auio.uio_iovcnt = 1; 5151 auio.uio_loffset = rwcmd.blkaddr * DEV_BSIZE; 5152 auio.uio_resid = rwcmd.buflen; 5153 auio.uio_segflg = flag & FKIOCTL ? UIO_SYSSPACE : UIO_USERSPACE; 5154 5155 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 5156 bioinit(buf); 5157 /* 5158 * We use the private field of buf to specify that this is an 5159 * I/O using an absolute offset. 5160 */ 5161 buf->b_private = (void *)VD_SLICE_NONE; 5162 5163 status = physio(vdc_strategy, buf, dev, rw, vdc_min, &auio); 5164 5165 biofini(buf); 5166 kmem_free(buf, sizeof (buf_t)); 5167 5168 return (status); 5169 } 5170 5171 /* 5172 * Allocate a buffer for a VD_OP_SCSICMD operation. The size of the allocated 5173 * buffer is returned in alloc_len. 5174 */ 5175 static vd_scsi_t * 5176 vdc_scsi_alloc(int cdb_len, int sense_len, int datain_len, int dataout_len, 5177 int *alloc_len) 5178 { 5179 vd_scsi_t *vd_scsi; 5180 int vd_scsi_len = VD_SCSI_SIZE; 5181 5182 vd_scsi_len += P2ROUNDUP(cdb_len, sizeof (uint64_t)); 5183 vd_scsi_len += P2ROUNDUP(sense_len, sizeof (uint64_t)); 5184 vd_scsi_len += P2ROUNDUP(datain_len, sizeof (uint64_t)); 5185 vd_scsi_len += P2ROUNDUP(dataout_len, sizeof (uint64_t)); 5186 5187 ASSERT(vd_scsi_len % sizeof (uint64_t) == 0); 5188 5189 vd_scsi = kmem_zalloc(vd_scsi_len, KM_SLEEP); 5190 5191 vd_scsi->cdb_len = cdb_len; 5192 vd_scsi->sense_len = sense_len; 5193 vd_scsi->datain_len = datain_len; 5194 vd_scsi->dataout_len = dataout_len; 5195 5196 *alloc_len = vd_scsi_len; 5197 5198 return (vd_scsi); 5199 } 5200 5201 /* 5202 * Convert the status of a SCSI command to a Solaris return code. 5203 * 5204 * Arguments: 5205 * vd_scsi - The SCSI operation buffer. 5206 * log_error - indicate if an error message should be logged. 5207 * 5208 * Note that our SCSI error messages are rather primitive for the moment 5209 * and could be improved by decoding some data like the SCSI command and 5210 * the sense key. 5211 * 5212 * Return value: 5213 * 0 - Status is good. 5214 * EACCES - Status reports a reservation conflict. 5215 * ENOTSUP - Status reports a check condition and sense key 5216 * reports an illegal request. 5217 * EIO - Any other status. 5218 */ 5219 static int 5220 vdc_scsi_status(vdc_t *vdc, vd_scsi_t *vd_scsi, boolean_t log_error) 5221 { 5222 int rv; 5223 char path_str[MAXPATHLEN]; 5224 char panic_str[VDC_RESV_CONFLICT_FMT_LEN + MAXPATHLEN]; 5225 union scsi_cdb *cdb; 5226 struct scsi_extended_sense *sense; 5227 5228 if (vd_scsi->cmd_status == STATUS_GOOD) 5229 /* no error */ 5230 return (0); 5231 5232 /* when the tunable vdc_scsi_log_error is true we log all errors */ 5233 if (vdc_scsi_log_error) 5234 log_error = B_TRUE; 5235 5236 if (log_error) { 5237 cmn_err(CE_WARN, "%s (vdc%d):\tError for Command: 0x%x)\n", 5238 ddi_pathname(vdc->dip, path_str), vdc->instance, 5239 GETCMD(VD_SCSI_DATA_CDB(vd_scsi))); 5240 } 5241 5242 /* default returned value */ 5243 rv = EIO; 5244 5245 switch (vd_scsi->cmd_status) { 5246 5247 case STATUS_CHECK: 5248 case STATUS_TERMINATED: 5249 if (log_error) 5250 cmn_err(CE_CONT, "\tCheck Condition Error\n"); 5251 5252 /* check sense buffer */ 5253 if (vd_scsi->sense_len == 0 || 5254 vd_scsi->sense_status != STATUS_GOOD) { 5255 if (log_error) 5256 cmn_err(CE_CONT, "\tNo Sense Data Available\n"); 5257 break; 5258 } 5259 5260 sense = VD_SCSI_DATA_SENSE(vd_scsi); 5261 5262 if (log_error) { 5263 cmn_err(CE_CONT, "\tSense Key: 0x%x\n" 5264 "\tASC: 0x%x, ASCQ: 0x%x\n", 5265 scsi_sense_key((uint8_t *)sense), 5266 scsi_sense_asc((uint8_t *)sense), 5267 scsi_sense_ascq((uint8_t *)sense)); 5268 } 5269 5270 if (scsi_sense_key((uint8_t *)sense) == KEY_ILLEGAL_REQUEST) 5271 rv = ENOTSUP; 5272 break; 5273 5274 case STATUS_BUSY: 5275 if (log_error) 5276 cmn_err(CE_NOTE, "\tDevice Busy\n"); 5277 break; 5278 5279 case STATUS_RESERVATION_CONFLICT: 5280 /* 5281 * If the command was PERSISTENT_RESERVATION_[IN|OUT] then 5282 * reservation conflict could be due to various reasons like 5283 * incorrect keys, not registered or not reserved etc. So, 5284 * we should not panic in that case. 5285 */ 5286 cdb = VD_SCSI_DATA_CDB(vd_scsi); 5287 if (vdc->failfast_interval != 0 && 5288 cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_IN && 5289 cdb->scc_cmd != SCMD_PERSISTENT_RESERVE_OUT) { 5290 /* failfast is enabled so we have to panic */ 5291 (void) snprintf(panic_str, sizeof (panic_str), 5292 VDC_RESV_CONFLICT_FMT_STR "%s", 5293 ddi_pathname(vdc->dip, path_str)); 5294 panic(panic_str); 5295 } 5296 if (log_error) 5297 cmn_err(CE_NOTE, "\tReservation Conflict\n"); 5298 rv = EACCES; 5299 break; 5300 5301 case STATUS_QFULL: 5302 if (log_error) 5303 cmn_err(CE_NOTE, "\tQueue Full\n"); 5304 break; 5305 5306 case STATUS_MET: 5307 case STATUS_INTERMEDIATE: 5308 case STATUS_SCSI2: 5309 case STATUS_INTERMEDIATE_MET: 5310 case STATUS_ACA_ACTIVE: 5311 if (log_error) 5312 cmn_err(CE_CONT, 5313 "\tUnexpected SCSI status received: 0x%x\n", 5314 vd_scsi->cmd_status); 5315 break; 5316 5317 default: 5318 if (log_error) 5319 cmn_err(CE_CONT, 5320 "\tInvalid SCSI status received: 0x%x\n", 5321 vd_scsi->cmd_status); 5322 break; 5323 } 5324 5325 return (rv); 5326 } 5327 5328 /* 5329 * Implemented the USCSICMD uscsi(7I) ioctl. This ioctl is converted to 5330 * a VD_OP_SCSICMD operation which is sent to the vdisk server. If a SCSI 5331 * reset is requested (i.e. a flag USCSI_RESET* is set) then the ioctl is 5332 * converted to a VD_OP_RESET operation. 5333 */ 5334 static int 5335 vdc_uscsi_cmd(vdc_t *vdc, caddr_t arg, int mode) 5336 { 5337 struct uscsi_cmd uscsi; 5338 struct uscsi_cmd32 uscsi32; 5339 vd_scsi_t *vd_scsi; 5340 int vd_scsi_len; 5341 union scsi_cdb *cdb; 5342 struct scsi_extended_sense *sense; 5343 char *datain, *dataout; 5344 size_t cdb_len, datain_len, dataout_len, sense_len; 5345 int rv; 5346 5347 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5348 if (ddi_copyin(arg, &uscsi32, sizeof (struct uscsi_cmd32), 5349 mode) != 0) 5350 return (EFAULT); 5351 uscsi_cmd32touscsi_cmd((&uscsi32), (&uscsi)); 5352 } else { 5353 if (ddi_copyin(arg, &uscsi, sizeof (struct uscsi_cmd), 5354 mode) != 0) 5355 return (EFAULT); 5356 } 5357 5358 /* a uscsi reset is converted to a VD_OP_RESET operation */ 5359 if (uscsi.uscsi_flags & (USCSI_RESET | USCSI_RESET_LUN | 5360 USCSI_RESET_ALL)) { 5361 rv = vdc_do_sync_op(vdc, VD_OP_RESET, NULL, 0, 0, 0, CB_SYNC, 5362 (void *)(uint64_t)mode, VIO_both_dir, B_TRUE); 5363 return (rv); 5364 } 5365 5366 /* cdb buffer length */ 5367 cdb_len = uscsi.uscsi_cdblen; 5368 5369 /* data in and out buffers length */ 5370 if (uscsi.uscsi_flags & USCSI_READ) { 5371 datain_len = uscsi.uscsi_buflen; 5372 dataout_len = 0; 5373 } else { 5374 datain_len = 0; 5375 dataout_len = uscsi.uscsi_buflen; 5376 } 5377 5378 /* sense buffer length */ 5379 if (uscsi.uscsi_flags & USCSI_RQENABLE) 5380 sense_len = uscsi.uscsi_rqlen; 5381 else 5382 sense_len = 0; 5383 5384 /* allocate buffer for the VD_SCSICMD_OP operation */ 5385 vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 5386 &vd_scsi_len); 5387 5388 /* 5389 * The documentation of USCSI_ISOLATE and USCSI_DIAGNOSE is very vague, 5390 * but basically they prevent a SCSI command from being retried in case 5391 * of an error. 5392 */ 5393 if ((uscsi.uscsi_flags & USCSI_ISOLATE) || 5394 (uscsi.uscsi_flags & USCSI_DIAGNOSE)) 5395 vd_scsi->options |= VD_SCSI_OPT_NORETRY; 5396 5397 /* set task attribute */ 5398 if (uscsi.uscsi_flags & USCSI_NOTAG) { 5399 vd_scsi->task_attribute = 0; 5400 } else { 5401 if (uscsi.uscsi_flags & USCSI_HEAD) 5402 vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 5403 else if (uscsi.uscsi_flags & USCSI_HTAG) 5404 vd_scsi->task_attribute = VD_SCSI_TASK_HQUEUE; 5405 else if (uscsi.uscsi_flags & USCSI_OTAG) 5406 vd_scsi->task_attribute = VD_SCSI_TASK_ORDERED; 5407 else 5408 vd_scsi->task_attribute = 0; 5409 } 5410 5411 /* set timeout */ 5412 vd_scsi->timeout = uscsi.uscsi_timeout; 5413 5414 /* copy-in cdb data */ 5415 cdb = VD_SCSI_DATA_CDB(vd_scsi); 5416 if (ddi_copyin(uscsi.uscsi_cdb, cdb, cdb_len, mode) != 0) { 5417 rv = EFAULT; 5418 goto done; 5419 } 5420 5421 /* keep a pointer to the sense buffer */ 5422 sense = VD_SCSI_DATA_SENSE(vd_scsi); 5423 5424 /* keep a pointer to the data-in buffer */ 5425 datain = (char *)VD_SCSI_DATA_IN(vd_scsi); 5426 5427 /* copy-in request data to the data-out buffer */ 5428 dataout = (char *)VD_SCSI_DATA_OUT(vd_scsi); 5429 if (!(uscsi.uscsi_flags & USCSI_READ)) { 5430 if (ddi_copyin(uscsi.uscsi_bufaddr, dataout, dataout_len, 5431 mode)) { 5432 rv = EFAULT; 5433 goto done; 5434 } 5435 } 5436 5437 /* submit the request */ 5438 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5439 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 5440 5441 if (rv != 0) 5442 goto done; 5443 5444 /* update scsi status */ 5445 uscsi.uscsi_status = vd_scsi->cmd_status; 5446 5447 /* update sense data */ 5448 if ((uscsi.uscsi_flags & USCSI_RQENABLE) && 5449 (uscsi.uscsi_status == STATUS_CHECK || 5450 uscsi.uscsi_status == STATUS_TERMINATED)) { 5451 5452 uscsi.uscsi_rqstatus = vd_scsi->sense_status; 5453 5454 if (uscsi.uscsi_rqstatus == STATUS_GOOD) { 5455 uscsi.uscsi_rqresid = uscsi.uscsi_rqlen - 5456 vd_scsi->sense_len; 5457 if (ddi_copyout(sense, uscsi.uscsi_rqbuf, 5458 vd_scsi->sense_len, mode) != 0) { 5459 rv = EFAULT; 5460 goto done; 5461 } 5462 } 5463 } 5464 5465 /* update request data */ 5466 if (uscsi.uscsi_status == STATUS_GOOD) { 5467 if (uscsi.uscsi_flags & USCSI_READ) { 5468 uscsi.uscsi_resid = uscsi.uscsi_buflen - 5469 vd_scsi->datain_len; 5470 if (ddi_copyout(datain, uscsi.uscsi_bufaddr, 5471 vd_scsi->datain_len, mode) != 0) { 5472 rv = EFAULT; 5473 goto done; 5474 } 5475 } else { 5476 uscsi.uscsi_resid = uscsi.uscsi_buflen - 5477 vd_scsi->dataout_len; 5478 } 5479 } 5480 5481 /* copy-out result */ 5482 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5483 uscsi_cmdtouscsi_cmd32((&uscsi), (&uscsi32)); 5484 if (ddi_copyout(&uscsi32, arg, sizeof (struct uscsi_cmd32), 5485 mode) != 0) { 5486 rv = EFAULT; 5487 goto done; 5488 } 5489 } else { 5490 if (ddi_copyout(&uscsi, arg, sizeof (struct uscsi_cmd), 5491 mode) != 0) { 5492 rv = EFAULT; 5493 goto done; 5494 } 5495 } 5496 5497 /* get the return code from the SCSI command status */ 5498 rv = vdc_scsi_status(vdc, vd_scsi, 5499 !(uscsi.uscsi_flags & USCSI_SILENT)); 5500 5501 done: 5502 kmem_free(vd_scsi, vd_scsi_len); 5503 return (rv); 5504 } 5505 5506 /* 5507 * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT IN command. 5508 * 5509 * Arguments: 5510 * cmd - SCSI PERSISTENT IN command 5511 * len - length of the SCSI input buffer 5512 * vd_scsi_len - return the length of the allocated buffer 5513 * 5514 * Returned Value: 5515 * a pointer to the allocated VD_OP_SCSICMD buffer. 5516 */ 5517 static vd_scsi_t * 5518 vdc_scsi_alloc_persistent_in(uchar_t cmd, int len, int *vd_scsi_len) 5519 { 5520 int cdb_len, sense_len, datain_len, dataout_len; 5521 vd_scsi_t *vd_scsi; 5522 union scsi_cdb *cdb; 5523 5524 cdb_len = CDB_GROUP1; 5525 sense_len = sizeof (struct scsi_extended_sense); 5526 datain_len = len; 5527 dataout_len = 0; 5528 5529 vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 5530 vd_scsi_len); 5531 5532 cdb = VD_SCSI_DATA_CDB(vd_scsi); 5533 5534 /* set cdb */ 5535 cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_IN; 5536 cdb->cdb_opaque[1] = cmd; 5537 FORMG1COUNT(cdb, datain_len); 5538 5539 vd_scsi->timeout = vdc_scsi_timeout; 5540 5541 return (vd_scsi); 5542 } 5543 5544 /* 5545 * Create a VD_OP_SCSICMD buffer for a SCSI PERSISTENT OUT command. 5546 * 5547 * Arguments: 5548 * cmd - SCSI PERSISTENT OUT command 5549 * len - length of the SCSI output buffer 5550 * vd_scsi_len - return the length of the allocated buffer 5551 * 5552 * Returned Code: 5553 * a pointer to the allocated VD_OP_SCSICMD buffer. 5554 */ 5555 static vd_scsi_t * 5556 vdc_scsi_alloc_persistent_out(uchar_t cmd, int len, int *vd_scsi_len) 5557 { 5558 int cdb_len, sense_len, datain_len, dataout_len; 5559 vd_scsi_t *vd_scsi; 5560 union scsi_cdb *cdb; 5561 5562 cdb_len = CDB_GROUP1; 5563 sense_len = sizeof (struct scsi_extended_sense); 5564 datain_len = 0; 5565 dataout_len = len; 5566 5567 vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, datain_len, dataout_len, 5568 vd_scsi_len); 5569 5570 cdb = VD_SCSI_DATA_CDB(vd_scsi); 5571 5572 /* set cdb */ 5573 cdb->scc_cmd = SCMD_PERSISTENT_RESERVE_OUT; 5574 cdb->cdb_opaque[1] = cmd; 5575 FORMG1COUNT(cdb, dataout_len); 5576 5577 vd_scsi->timeout = vdc_scsi_timeout; 5578 5579 return (vd_scsi); 5580 } 5581 5582 /* 5583 * Implement the MHIOCGRP_INKEYS mhd(7i) ioctl. The ioctl is converted 5584 * to a SCSI PERSISTENT IN READ KEYS command which is sent to the vdisk 5585 * server with a VD_OP_SCSICMD operation. 5586 */ 5587 static int 5588 vdc_mhd_inkeys(vdc_t *vdc, caddr_t arg, int mode) 5589 { 5590 vd_scsi_t *vd_scsi; 5591 mhioc_inkeys_t inkeys; 5592 mhioc_key_list_t klist; 5593 struct mhioc_inkeys32 inkeys32; 5594 struct mhioc_key_list32 klist32; 5595 sd_prin_readkeys_t *scsi_keys; 5596 void *user_keys; 5597 int vd_scsi_len; 5598 int listsize, listlen, rv; 5599 5600 /* copyin arguments */ 5601 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5602 rv = ddi_copyin(arg, &inkeys32, sizeof (inkeys32), mode); 5603 if (rv != 0) 5604 return (EFAULT); 5605 5606 rv = ddi_copyin((caddr_t)(uintptr_t)inkeys32.li, &klist32, 5607 sizeof (klist32), mode); 5608 if (rv != 0) 5609 return (EFAULT); 5610 5611 listsize = klist32.listsize; 5612 } else { 5613 rv = ddi_copyin(arg, &inkeys, sizeof (inkeys), mode); 5614 if (rv != 0) 5615 return (EFAULT); 5616 5617 rv = ddi_copyin(inkeys.li, &klist, sizeof (klist), mode); 5618 if (rv != 0) 5619 return (EFAULT); 5620 5621 listsize = klist.listsize; 5622 } 5623 5624 /* build SCSI VD_OP request */ 5625 vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_KEYS, 5626 sizeof (sd_prin_readkeys_t) - sizeof (caddr_t) + 5627 (sizeof (mhioc_resv_key_t) * listsize), &vd_scsi_len); 5628 5629 scsi_keys = (sd_prin_readkeys_t *)VD_SCSI_DATA_IN(vd_scsi); 5630 5631 /* submit the request */ 5632 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5633 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 5634 5635 if (rv != 0) 5636 goto done; 5637 5638 listlen = scsi_keys->len / MHIOC_RESV_KEY_SIZE; 5639 5640 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5641 inkeys32.generation = scsi_keys->generation; 5642 rv = ddi_copyout(&inkeys32, arg, sizeof (inkeys32), mode); 5643 if (rv != 0) { 5644 rv = EFAULT; 5645 goto done; 5646 } 5647 5648 klist32.listlen = listlen; 5649 rv = ddi_copyout(&klist32, (caddr_t)(uintptr_t)inkeys32.li, 5650 sizeof (klist32), mode); 5651 if (rv != 0) { 5652 rv = EFAULT; 5653 goto done; 5654 } 5655 5656 user_keys = (caddr_t)(uintptr_t)klist32.list; 5657 } else { 5658 inkeys.generation = scsi_keys->generation; 5659 rv = ddi_copyout(&inkeys, arg, sizeof (inkeys), mode); 5660 if (rv != 0) { 5661 rv = EFAULT; 5662 goto done; 5663 } 5664 5665 klist.listlen = listlen; 5666 rv = ddi_copyout(&klist, inkeys.li, sizeof (klist), mode); 5667 if (rv != 0) { 5668 rv = EFAULT; 5669 goto done; 5670 } 5671 5672 user_keys = klist.list; 5673 } 5674 5675 /* copy out keys */ 5676 if (listlen > 0 && listsize > 0) { 5677 if (listsize < listlen) 5678 listlen = listsize; 5679 rv = ddi_copyout(&scsi_keys->keylist, user_keys, 5680 listlen * MHIOC_RESV_KEY_SIZE, mode); 5681 if (rv != 0) 5682 rv = EFAULT; 5683 } 5684 5685 if (rv == 0) 5686 rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 5687 5688 done: 5689 kmem_free(vd_scsi, vd_scsi_len); 5690 5691 return (rv); 5692 } 5693 5694 /* 5695 * Implement the MHIOCGRP_INRESV mhd(7i) ioctl. The ioctl is converted 5696 * to a SCSI PERSISTENT IN READ RESERVATION command which is sent to 5697 * the vdisk server with a VD_OP_SCSICMD operation. 5698 */ 5699 static int 5700 vdc_mhd_inresv(vdc_t *vdc, caddr_t arg, int mode) 5701 { 5702 vd_scsi_t *vd_scsi; 5703 mhioc_inresvs_t inresv; 5704 mhioc_resv_desc_list_t rlist; 5705 struct mhioc_inresvs32 inresv32; 5706 struct mhioc_resv_desc_list32 rlist32; 5707 mhioc_resv_desc_t mhd_resv; 5708 sd_prin_readresv_t *scsi_resv; 5709 sd_readresv_desc_t *resv; 5710 mhioc_resv_desc_t *user_resv; 5711 int vd_scsi_len; 5712 int listsize, listlen, i, rv; 5713 5714 /* copyin arguments */ 5715 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5716 rv = ddi_copyin(arg, &inresv32, sizeof (inresv32), mode); 5717 if (rv != 0) 5718 return (EFAULT); 5719 5720 rv = ddi_copyin((caddr_t)(uintptr_t)inresv32.li, &rlist32, 5721 sizeof (rlist32), mode); 5722 if (rv != 0) 5723 return (EFAULT); 5724 5725 listsize = rlist32.listsize; 5726 } else { 5727 rv = ddi_copyin(arg, &inresv, sizeof (inresv), mode); 5728 if (rv != 0) 5729 return (EFAULT); 5730 5731 rv = ddi_copyin(inresv.li, &rlist, sizeof (rlist), mode); 5732 if (rv != 0) 5733 return (EFAULT); 5734 5735 listsize = rlist.listsize; 5736 } 5737 5738 /* build SCSI VD_OP request */ 5739 vd_scsi = vdc_scsi_alloc_persistent_in(SD_READ_RESV, 5740 sizeof (sd_prin_readresv_t) - sizeof (caddr_t) + 5741 (SCSI3_RESV_DESC_LEN * listsize), &vd_scsi_len); 5742 5743 scsi_resv = (sd_prin_readresv_t *)VD_SCSI_DATA_IN(vd_scsi); 5744 5745 /* submit the request */ 5746 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5747 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 5748 5749 if (rv != 0) 5750 goto done; 5751 5752 listlen = scsi_resv->len / SCSI3_RESV_DESC_LEN; 5753 5754 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 5755 inresv32.generation = scsi_resv->generation; 5756 rv = ddi_copyout(&inresv32, arg, sizeof (inresv32), mode); 5757 if (rv != 0) { 5758 rv = EFAULT; 5759 goto done; 5760 } 5761 5762 rlist32.listlen = listlen; 5763 rv = ddi_copyout(&rlist32, (caddr_t)(uintptr_t)inresv32.li, 5764 sizeof (rlist32), mode); 5765 if (rv != 0) { 5766 rv = EFAULT; 5767 goto done; 5768 } 5769 5770 user_resv = (mhioc_resv_desc_t *)(uintptr_t)rlist32.list; 5771 } else { 5772 inresv.generation = scsi_resv->generation; 5773 rv = ddi_copyout(&inresv, arg, sizeof (inresv), mode); 5774 if (rv != 0) { 5775 rv = EFAULT; 5776 goto done; 5777 } 5778 5779 rlist.listlen = listlen; 5780 rv = ddi_copyout(&rlist, inresv.li, sizeof (rlist), mode); 5781 if (rv != 0) { 5782 rv = EFAULT; 5783 goto done; 5784 } 5785 5786 user_resv = rlist.list; 5787 } 5788 5789 /* copy out reservations */ 5790 if (listsize > 0 && listlen > 0) { 5791 if (listsize < listlen) 5792 listlen = listsize; 5793 resv = (sd_readresv_desc_t *)&scsi_resv->readresv_desc; 5794 5795 for (i = 0; i < listlen; i++) { 5796 mhd_resv.type = resv->type; 5797 mhd_resv.scope = resv->scope; 5798 mhd_resv.scope_specific_addr = 5799 BE_32(resv->scope_specific_addr); 5800 bcopy(&resv->resvkey, &mhd_resv.key, 5801 MHIOC_RESV_KEY_SIZE); 5802 5803 rv = ddi_copyout(&mhd_resv, user_resv, 5804 sizeof (mhd_resv), mode); 5805 if (rv != 0) { 5806 rv = EFAULT; 5807 goto done; 5808 } 5809 resv++; 5810 user_resv++; 5811 } 5812 } 5813 5814 if (rv == 0) 5815 rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 5816 5817 done: 5818 kmem_free(vd_scsi, vd_scsi_len); 5819 return (rv); 5820 } 5821 5822 /* 5823 * Implement the MHIOCGRP_REGISTER mhd(7i) ioctl. The ioctl is converted 5824 * to a SCSI PERSISTENT OUT REGISTER command which is sent to the vdisk 5825 * server with a VD_OP_SCSICMD operation. 5826 */ 5827 static int 5828 vdc_mhd_register(vdc_t *vdc, caddr_t arg, int mode) 5829 { 5830 vd_scsi_t *vd_scsi; 5831 sd_prout_t *scsi_prout; 5832 mhioc_register_t mhd_reg; 5833 int vd_scsi_len, rv; 5834 5835 /* copyin arguments */ 5836 rv = ddi_copyin(arg, &mhd_reg, sizeof (mhd_reg), mode); 5837 if (rv != 0) 5838 return (EFAULT); 5839 5840 /* build SCSI VD_OP request */ 5841 vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTER, 5842 sizeof (sd_prout_t), &vd_scsi_len); 5843 5844 /* set parameters */ 5845 scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 5846 bcopy(mhd_reg.oldkey.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 5847 bcopy(mhd_reg.newkey.key, scsi_prout->service_key, MHIOC_RESV_KEY_SIZE); 5848 scsi_prout->aptpl = (uchar_t)mhd_reg.aptpl; 5849 5850 /* submit the request */ 5851 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5852 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 5853 5854 if (rv == 0) 5855 rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 5856 5857 kmem_free(vd_scsi, vd_scsi_len); 5858 return (rv); 5859 } 5860 5861 /* 5862 * Implement the MHIOCGRP_RESERVE mhd(7i) ioctl. The ioctl is converted 5863 * to a SCSI PERSISTENT OUT RESERVE command which is sent to the vdisk 5864 * server with a VD_OP_SCSICMD operation. 5865 */ 5866 static int 5867 vdc_mhd_reserve(vdc_t *vdc, caddr_t arg, int mode) 5868 { 5869 union scsi_cdb *cdb; 5870 vd_scsi_t *vd_scsi; 5871 sd_prout_t *scsi_prout; 5872 mhioc_resv_desc_t mhd_resv; 5873 int vd_scsi_len, rv; 5874 5875 /* copyin arguments */ 5876 rv = ddi_copyin(arg, &mhd_resv, sizeof (mhd_resv), mode); 5877 if (rv != 0) 5878 return (EFAULT); 5879 5880 /* build SCSI VD_OP request */ 5881 vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_RESERVE, 5882 sizeof (sd_prout_t), &vd_scsi_len); 5883 5884 /* set parameters */ 5885 cdb = VD_SCSI_DATA_CDB(vd_scsi); 5886 scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 5887 bcopy(mhd_resv.key.key, scsi_prout->res_key, MHIOC_RESV_KEY_SIZE); 5888 scsi_prout->scope_address = mhd_resv.scope_specific_addr; 5889 cdb->cdb_opaque[2] = mhd_resv.type; 5890 5891 /* submit the request */ 5892 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5893 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 5894 5895 if (rv == 0) 5896 rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 5897 5898 kmem_free(vd_scsi, vd_scsi_len); 5899 return (rv); 5900 } 5901 5902 /* 5903 * Implement the MHIOCGRP_PREEMPTANDABORT mhd(7i) ioctl. The ioctl is 5904 * converted to a SCSI PERSISTENT OUT PREEMPT AND ABORT command which 5905 * is sent to the vdisk server with a VD_OP_SCSICMD operation. 5906 */ 5907 static int 5908 vdc_mhd_preemptabort(vdc_t *vdc, caddr_t arg, int mode) 5909 { 5910 union scsi_cdb *cdb; 5911 vd_scsi_t *vd_scsi; 5912 sd_prout_t *scsi_prout; 5913 mhioc_preemptandabort_t mhd_preempt; 5914 int vd_scsi_len, rv; 5915 5916 /* copyin arguments */ 5917 rv = ddi_copyin(arg, &mhd_preempt, sizeof (mhd_preempt), mode); 5918 if (rv != 0) 5919 return (EFAULT); 5920 5921 /* build SCSI VD_OP request */ 5922 vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_PREEMPTANDABORT, 5923 sizeof (sd_prout_t), &vd_scsi_len); 5924 5925 /* set parameters */ 5926 vd_scsi->task_attribute = VD_SCSI_TASK_ACA; 5927 cdb = VD_SCSI_DATA_CDB(vd_scsi); 5928 scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 5929 bcopy(mhd_preempt.resvdesc.key.key, scsi_prout->res_key, 5930 MHIOC_RESV_KEY_SIZE); 5931 bcopy(mhd_preempt.victim_key.key, scsi_prout->service_key, 5932 MHIOC_RESV_KEY_SIZE); 5933 scsi_prout->scope_address = mhd_preempt.resvdesc.scope_specific_addr; 5934 cdb->cdb_opaque[2] = mhd_preempt.resvdesc.type; 5935 5936 /* submit the request */ 5937 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5938 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 5939 5940 if (rv == 0) 5941 rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 5942 5943 kmem_free(vd_scsi, vd_scsi_len); 5944 return (rv); 5945 } 5946 5947 /* 5948 * Implement the MHIOCGRP_REGISTERANDIGNOREKEY mhd(7i) ioctl. The ioctl 5949 * is converted to a SCSI PERSISTENT OUT REGISTER AND IGNORE EXISTING KEY 5950 * command which is sent to the vdisk server with a VD_OP_SCSICMD operation. 5951 */ 5952 static int 5953 vdc_mhd_registerignore(vdc_t *vdc, caddr_t arg, int mode) 5954 { 5955 vd_scsi_t *vd_scsi; 5956 sd_prout_t *scsi_prout; 5957 mhioc_registerandignorekey_t mhd_regi; 5958 int vd_scsi_len, rv; 5959 5960 /* copyin arguments */ 5961 rv = ddi_copyin(arg, &mhd_regi, sizeof (mhd_regi), mode); 5962 if (rv != 0) 5963 return (EFAULT); 5964 5965 /* build SCSI VD_OP request */ 5966 vd_scsi = vdc_scsi_alloc_persistent_out(SD_SCSI3_REGISTERANDIGNOREKEY, 5967 sizeof (sd_prout_t), &vd_scsi_len); 5968 5969 /* set parameters */ 5970 scsi_prout = (sd_prout_t *)VD_SCSI_DATA_OUT(vd_scsi); 5971 bcopy(mhd_regi.newkey.key, scsi_prout->service_key, 5972 MHIOC_RESV_KEY_SIZE); 5973 scsi_prout->aptpl = (uchar_t)mhd_regi.aptpl; 5974 5975 /* submit the request */ 5976 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 5977 0, 0, CB_SYNC, (void *)(uint64_t)mode, VIO_both_dir, B_FALSE); 5978 5979 if (rv == 0) 5980 rv = vdc_scsi_status(vdc, vd_scsi, B_FALSE); 5981 5982 kmem_free(vd_scsi, vd_scsi_len); 5983 return (rv); 5984 } 5985 5986 /* 5987 * This function is used by the failfast mechanism to send a SCSI command 5988 * to check for reservation conflict. 5989 */ 5990 static int 5991 vdc_failfast_scsi_cmd(vdc_t *vdc, uchar_t scmd) 5992 { 5993 int cdb_len, sense_len, vd_scsi_len; 5994 vd_scsi_t *vd_scsi; 5995 union scsi_cdb *cdb; 5996 int rv; 5997 5998 ASSERT(scmd == SCMD_TEST_UNIT_READY || scmd == SCMD_WRITE_G1); 5999 6000 if (scmd == SCMD_WRITE_G1) 6001 cdb_len = CDB_GROUP1; 6002 else 6003 cdb_len = CDB_GROUP0; 6004 6005 sense_len = sizeof (struct scsi_extended_sense); 6006 6007 vd_scsi = vdc_scsi_alloc(cdb_len, sense_len, 0, 0, &vd_scsi_len); 6008 6009 /* set cdb */ 6010 cdb = VD_SCSI_DATA_CDB(vd_scsi); 6011 cdb->scc_cmd = scmd; 6012 6013 vd_scsi->timeout = vdc_scsi_timeout; 6014 6015 /* 6016 * Submit the request. The last argument has to be B_FALSE so that 6017 * vdc_do_sync_op does not loop checking for reservation conflict if 6018 * the operation returns an error. 6019 */ 6020 rv = vdc_do_sync_op(vdc, VD_OP_SCSICMD, (caddr_t)vd_scsi, vd_scsi_len, 6021 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_FALSE); 6022 6023 if (rv == 0) 6024 (void) vdc_scsi_status(vdc, vd_scsi, B_FALSE); 6025 6026 kmem_free(vd_scsi, vd_scsi_len); 6027 return (rv); 6028 } 6029 6030 /* 6031 * This function is used by the failfast mechanism to check for reservation 6032 * conflict. It sends some SCSI commands which will fail with a reservation 6033 * conflict error if the system does not have access to the disk and this 6034 * will panic the system. 6035 * 6036 * Returned Code: 6037 * 0 - disk is accessible without reservation conflict error 6038 * != 0 - unable to check if disk is accessible 6039 */ 6040 int 6041 vdc_failfast_check_resv(vdc_t *vdc) 6042 { 6043 int failure = 0; 6044 6045 /* 6046 * Send a TEST UNIT READY command. The command will panic 6047 * the system if it fails with a reservation conflict. 6048 */ 6049 if (vdc_failfast_scsi_cmd(vdc, SCMD_TEST_UNIT_READY) != 0) 6050 failure++; 6051 6052 /* 6053 * With SPC-3 compliant devices TEST UNIT READY will succeed on 6054 * a reserved device, so we also do a WRITE(10) of zero byte in 6055 * order to provoke a Reservation Conflict status on those newer 6056 * devices. 6057 */ 6058 if (vdc_failfast_scsi_cmd(vdc, SCMD_WRITE_G1) != 0) 6059 failure++; 6060 6061 return (failure); 6062 } 6063 6064 /* 6065 * Add a pending I/O to the failfast I/O queue. An I/O is added to this 6066 * queue when it has failed and failfast is enabled. Then we have to check 6067 * if it has failed because of a reservation conflict in which case we have 6068 * to panic the system. 6069 * 6070 * Async I/O should be queued with their block I/O data transfer structure 6071 * (buf). Sync I/O should be queued with buf = NULL. 6072 */ 6073 static vdc_io_t * 6074 vdc_failfast_io_queue(vdc_t *vdc, struct buf *buf) 6075 { 6076 vdc_io_t *vio; 6077 6078 ASSERT(MUTEX_HELD(&vdc->lock)); 6079 6080 vio = kmem_alloc(sizeof (vdc_io_t), KM_SLEEP); 6081 vio->vio_next = vdc->failfast_io_queue; 6082 vio->vio_buf = buf; 6083 vio->vio_qtime = ddi_get_lbolt(); 6084 6085 vdc->failfast_io_queue = vio; 6086 6087 /* notify the failfast thread that a new I/O is queued */ 6088 cv_signal(&vdc->failfast_cv); 6089 6090 return (vio); 6091 } 6092 6093 /* 6094 * Remove and complete I/O in the failfast I/O queue which have been 6095 * added after the indicated deadline. A deadline of 0 means that all 6096 * I/O have to be unqueued and marked as completed. 6097 */ 6098 static void 6099 vdc_failfast_io_unqueue(vdc_t *vdc, clock_t deadline) 6100 { 6101 vdc_io_t *vio, *vio_tmp; 6102 6103 ASSERT(MUTEX_HELD(&vdc->lock)); 6104 6105 vio_tmp = NULL; 6106 vio = vdc->failfast_io_queue; 6107 6108 if (deadline != 0) { 6109 /* 6110 * Skip any io queued after the deadline. The failfast 6111 * I/O queue is ordered starting with the last I/O added 6112 * to the queue. 6113 */ 6114 while (vio != NULL && vio->vio_qtime > deadline) { 6115 vio_tmp = vio; 6116 vio = vio->vio_next; 6117 } 6118 } 6119 6120 if (vio == NULL) 6121 /* nothing to unqueue */ 6122 return; 6123 6124 /* update the queue */ 6125 if (vio_tmp == NULL) 6126 vdc->failfast_io_queue = NULL; 6127 else 6128 vio_tmp->vio_next = NULL; 6129 6130 /* 6131 * Complete unqueued I/O. Async I/O have a block I/O data transfer 6132 * structure (buf) and they are completed by calling biodone(). Sync 6133 * I/O do not have a buf and they are completed by setting the 6134 * vio_qtime to zero and signaling failfast_io_cv. In that case, the 6135 * thread waiting for the I/O to complete is responsible for freeing 6136 * the vio structure. 6137 */ 6138 while (vio != NULL) { 6139 vio_tmp = vio->vio_next; 6140 if (vio->vio_buf != NULL) { 6141 VD_KSTAT_RUNQ_EXIT(vdc); 6142 DTRACE_IO1(done, buf_t *, vio->vio_buf); 6143 biodone(vio->vio_buf); 6144 kmem_free(vio, sizeof (vdc_io_t)); 6145 } else { 6146 vio->vio_qtime = 0; 6147 } 6148 vio = vio_tmp; 6149 } 6150 6151 cv_broadcast(&vdc->failfast_io_cv); 6152 } 6153 6154 /* 6155 * Failfast Thread. 6156 * 6157 * While failfast is enabled, the failfast thread sends a TEST UNIT READY 6158 * and a zero size WRITE(10) SCSI commands on a regular basis to check that 6159 * we still have access to the disk. If a command fails with a RESERVATION 6160 * CONFLICT error then the system will immediatly panic. 6161 * 6162 * The failfast thread is also woken up when an I/O has failed. It then check 6163 * the access to the disk to ensure that the I/O failure was not due to a 6164 * reservation conflict. 6165 * 6166 * There is one failfast thread for each virtual disk for which failfast is 6167 * enabled. We could have only one thread sending requests for all disks but 6168 * this would need vdc to send asynchronous requests and to have callbacks to 6169 * process replies. 6170 */ 6171 static void 6172 vdc_failfast_thread(void *arg) 6173 { 6174 int status; 6175 vdc_t *vdc = (vdc_t *)arg; 6176 clock_t timeout, starttime; 6177 6178 mutex_enter(&vdc->lock); 6179 6180 while (vdc->failfast_interval != 0) { 6181 6182 starttime = ddi_get_lbolt(); 6183 6184 mutex_exit(&vdc->lock); 6185 6186 /* check for reservation conflict */ 6187 status = vdc_failfast_check_resv(vdc); 6188 6189 mutex_enter(&vdc->lock); 6190 /* 6191 * We have dropped the lock to send the SCSI command so we have 6192 * to check that failfast is still enabled. 6193 */ 6194 if (vdc->failfast_interval == 0) 6195 break; 6196 6197 /* 6198 * If we have successfully check the disk access and there was 6199 * no reservation conflict then we can complete any I/O queued 6200 * before the last check. 6201 */ 6202 if (status == 0) 6203 vdc_failfast_io_unqueue(vdc, starttime); 6204 6205 /* proceed again if some I/O are still in the queue */ 6206 if (vdc->failfast_io_queue != NULL) 6207 continue; 6208 6209 timeout = ddi_get_lbolt() + 6210 drv_usectohz(vdc->failfast_interval); 6211 (void) cv_timedwait(&vdc->failfast_cv, &vdc->lock, timeout); 6212 } 6213 6214 /* 6215 * Failfast is being stop so we can complete any queued I/O. 6216 */ 6217 vdc_failfast_io_unqueue(vdc, 0); 6218 vdc->failfast_thread = NULL; 6219 mutex_exit(&vdc->lock); 6220 thread_exit(); 6221 } 6222 6223 /* 6224 * Implement the MHIOCENFAILFAST mhd(7i) ioctl. 6225 */ 6226 static int 6227 vdc_failfast(vdc_t *vdc, caddr_t arg, int mode) 6228 { 6229 unsigned int mh_time; 6230 6231 if (ddi_copyin((void *)arg, &mh_time, sizeof (int), mode)) 6232 return (EFAULT); 6233 6234 mutex_enter(&vdc->lock); 6235 if (mh_time != 0 && vdc->failfast_thread == NULL) { 6236 vdc->failfast_thread = thread_create(NULL, 0, 6237 vdc_failfast_thread, vdc, 0, &p0, TS_RUN, 6238 v.v_maxsyspri - 2); 6239 } 6240 6241 vdc->failfast_interval = mh_time * 1000; 6242 cv_signal(&vdc->failfast_cv); 6243 mutex_exit(&vdc->lock); 6244 6245 return (0); 6246 } 6247 6248 /* 6249 * Implement the MHIOCTKOWN and MHIOCRELEASE mhd(7i) ioctls. These ioctls are 6250 * converted to VD_OP_SET_ACCESS operations. 6251 */ 6252 static int 6253 vdc_access_set(vdc_t *vdc, uint64_t flags, int mode) 6254 { 6255 int rv; 6256 6257 /* submit owership command request */ 6258 rv = vdc_do_sync_op(vdc, VD_OP_SET_ACCESS, (caddr_t)&flags, 6259 sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 6260 VIO_both_dir, B_TRUE); 6261 6262 return (rv); 6263 } 6264 6265 /* 6266 * Implement the MHIOCSTATUS mhd(7i) ioctl. This ioctl is converted to a 6267 * VD_OP_GET_ACCESS operation. 6268 */ 6269 static int 6270 vdc_access_get(vdc_t *vdc, uint64_t *status, int mode) 6271 { 6272 int rv; 6273 6274 /* submit owership command request */ 6275 rv = vdc_do_sync_op(vdc, VD_OP_GET_ACCESS, (caddr_t)status, 6276 sizeof (uint64_t), 0, 0, CB_SYNC, (void *)(uint64_t)mode, 6277 VIO_both_dir, B_TRUE); 6278 6279 return (rv); 6280 } 6281 6282 /* 6283 * Disk Ownership Thread. 6284 * 6285 * When we have taken the ownership of a disk, this thread waits to be 6286 * notified when the LDC channel is reset so that it can recover the 6287 * ownership. 6288 * 6289 * Note that the thread handling the LDC reset (vdc_process_msg_thread()) 6290 * can not be used to do the ownership recovery because it has to be 6291 * running to handle the reply message to the ownership operation. 6292 */ 6293 static void 6294 vdc_ownership_thread(void *arg) 6295 { 6296 vdc_t *vdc = (vdc_t *)arg; 6297 clock_t timeout; 6298 uint64_t status; 6299 6300 mutex_enter(&vdc->ownership_lock); 6301 mutex_enter(&vdc->lock); 6302 6303 while (vdc->ownership & VDC_OWNERSHIP_WANTED) { 6304 6305 if ((vdc->ownership & VDC_OWNERSHIP_RESET) || 6306 !(vdc->ownership & VDC_OWNERSHIP_GRANTED)) { 6307 /* 6308 * There was a reset so the ownership has been lost, 6309 * try to recover. We do this without using the preempt 6310 * option so that we don't steal the ownership from 6311 * someone who has preempted us. 6312 */ 6313 DMSG(vdc, 0, "[%d] Ownership lost, recovering", 6314 vdc->instance); 6315 6316 vdc->ownership &= ~(VDC_OWNERSHIP_RESET | 6317 VDC_OWNERSHIP_GRANTED); 6318 6319 mutex_exit(&vdc->lock); 6320 6321 status = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 6322 VD_ACCESS_SET_PRESERVE, FKIOCTL); 6323 6324 mutex_enter(&vdc->lock); 6325 6326 if (status == 0) { 6327 DMSG(vdc, 0, "[%d] Ownership recovered", 6328 vdc->instance); 6329 vdc->ownership |= VDC_OWNERSHIP_GRANTED; 6330 } else { 6331 DMSG(vdc, 0, "[%d] Fail to recover ownership", 6332 vdc->instance); 6333 } 6334 6335 } 6336 6337 /* 6338 * If we have the ownership then we just wait for an event 6339 * to happen (LDC reset), otherwise we will retry to recover 6340 * after a delay. 6341 */ 6342 if (vdc->ownership & VDC_OWNERSHIP_GRANTED) 6343 timeout = 0; 6344 else 6345 timeout = ddi_get_lbolt() + 6346 drv_usectohz(vdc_ownership_delay); 6347 6348 /* Release the ownership_lock and wait on the vdc lock */ 6349 mutex_exit(&vdc->ownership_lock); 6350 6351 if (timeout == 0) 6352 (void) cv_wait(&vdc->ownership_cv, &vdc->lock); 6353 else 6354 (void) cv_timedwait(&vdc->ownership_cv, 6355 &vdc->lock, timeout); 6356 6357 mutex_exit(&vdc->lock); 6358 6359 mutex_enter(&vdc->ownership_lock); 6360 mutex_enter(&vdc->lock); 6361 } 6362 6363 vdc->ownership_thread = NULL; 6364 mutex_exit(&vdc->lock); 6365 mutex_exit(&vdc->ownership_lock); 6366 6367 thread_exit(); 6368 } 6369 6370 static void 6371 vdc_ownership_update(vdc_t *vdc, int ownership_flags) 6372 { 6373 ASSERT(MUTEX_HELD(&vdc->ownership_lock)); 6374 6375 mutex_enter(&vdc->lock); 6376 vdc->ownership = ownership_flags; 6377 if ((vdc->ownership & VDC_OWNERSHIP_WANTED) && 6378 vdc->ownership_thread == NULL) { 6379 /* start ownership thread */ 6380 vdc->ownership_thread = thread_create(NULL, 0, 6381 vdc_ownership_thread, vdc, 0, &p0, TS_RUN, 6382 v.v_maxsyspri - 2); 6383 } else { 6384 /* notify the ownership thread */ 6385 cv_signal(&vdc->ownership_cv); 6386 } 6387 mutex_exit(&vdc->lock); 6388 } 6389 6390 /* 6391 * Get the size and the block size of a virtual disk from the vdisk server. 6392 * We need to use this operation when the vdisk_size attribute was not 6393 * available during the handshake with the vdisk server. 6394 */ 6395 static int 6396 vdc_check_capacity(vdc_t *vdc) 6397 { 6398 int rv = 0; 6399 size_t alloc_len; 6400 vd_capacity_t *vd_cap; 6401 6402 if (vdc->vdisk_size != 0) 6403 return (0); 6404 6405 alloc_len = P2ROUNDUP(sizeof (vd_capacity_t), sizeof (uint64_t)); 6406 6407 vd_cap = kmem_zalloc(alloc_len, KM_SLEEP); 6408 6409 rv = vdc_do_sync_op(vdc, VD_OP_GET_CAPACITY, (caddr_t)vd_cap, alloc_len, 6410 0, 0, CB_SYNC, (void *)(uint64_t)FKIOCTL, VIO_both_dir, B_TRUE); 6411 6412 if (rv == 0) { 6413 if (vd_cap->vdisk_block_size != vdc->block_size || 6414 vd_cap->vdisk_size == VD_SIZE_UNKNOWN || 6415 vd_cap->vdisk_size == 0) 6416 rv = EINVAL; 6417 else 6418 vdc->vdisk_size = vd_cap->vdisk_size; 6419 } 6420 6421 kmem_free(vd_cap, alloc_len); 6422 return (rv); 6423 } 6424 6425 /* 6426 * This structure is used in the DKIO(7I) array below. 6427 */ 6428 typedef struct vdc_dk_ioctl { 6429 uint8_t op; /* VD_OP_XXX value */ 6430 int cmd; /* Solaris ioctl operation number */ 6431 size_t nbytes; /* size of structure to be copied */ 6432 6433 /* function to convert between vDisk and Solaris structure formats */ 6434 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 6435 int mode, int dir); 6436 } vdc_dk_ioctl_t; 6437 6438 /* 6439 * Subset of DKIO(7I) operations currently supported 6440 */ 6441 static vdc_dk_ioctl_t dk_ioctl[] = { 6442 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, 0, 6443 vdc_null_copy_func}, 6444 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 6445 vdc_get_wce_convert}, 6446 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 6447 vdc_set_wce_convert}, 6448 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 6449 vdc_get_vtoc_convert}, 6450 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 6451 vdc_set_vtoc_convert}, 6452 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 6453 vdc_get_geom_convert}, 6454 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 6455 vdc_get_geom_convert}, 6456 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 6457 vdc_get_geom_convert}, 6458 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 6459 vdc_set_geom_convert}, 6460 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 6461 vdc_get_efi_convert}, 6462 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 6463 vdc_set_efi_convert}, 6464 6465 /* DIOCTL_RWCMD is converted to a read or a write */ 6466 {0, DIOCTL_RWCMD, sizeof (struct dadkio_rwcmd), NULL}, 6467 6468 /* mhd(7I) non-shared multihost disks ioctls */ 6469 {0, MHIOCTKOWN, 0, vdc_null_copy_func}, 6470 {0, MHIOCRELEASE, 0, vdc_null_copy_func}, 6471 {0, MHIOCSTATUS, 0, vdc_null_copy_func}, 6472 {0, MHIOCQRESERVE, 0, vdc_null_copy_func}, 6473 6474 /* mhd(7I) shared multihost disks ioctls */ 6475 {0, MHIOCGRP_INKEYS, 0, vdc_null_copy_func}, 6476 {0, MHIOCGRP_INRESV, 0, vdc_null_copy_func}, 6477 {0, MHIOCGRP_REGISTER, 0, vdc_null_copy_func}, 6478 {0, MHIOCGRP_RESERVE, 0, vdc_null_copy_func}, 6479 {0, MHIOCGRP_PREEMPTANDABORT, 0, vdc_null_copy_func}, 6480 {0, MHIOCGRP_REGISTERANDIGNOREKEY, 0, vdc_null_copy_func}, 6481 6482 /* mhd(7I) failfast ioctl */ 6483 {0, MHIOCENFAILFAST, 0, vdc_null_copy_func}, 6484 6485 /* 6486 * These particular ioctls are not sent to the server - vdc fakes up 6487 * the necessary info. 6488 */ 6489 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 6490 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 6491 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 6492 {0, DKIOCPARTITION, 0, vdc_null_copy_func }, 6493 {0, DKIOCGAPART, 0, vdc_null_copy_func }, 6494 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 6495 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 6496 }; 6497 6498 /* 6499 * This function handles ioctl requests from the vd_efi_alloc_and_read() 6500 * function and forward them to the vdisk. 6501 */ 6502 static int 6503 vd_process_efi_ioctl(void *vdisk, int cmd, uintptr_t arg) 6504 { 6505 vdc_t *vdc = (vdc_t *)vdisk; 6506 dev_t dev; 6507 int rval; 6508 6509 dev = makedevice(ddi_driver_major(vdc->dip), 6510 VD_MAKE_DEV(vdc->instance, 0)); 6511 6512 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, FKIOCTL, &rval)); 6513 } 6514 6515 /* 6516 * Function: 6517 * vd_process_ioctl() 6518 * 6519 * Description: 6520 * This routine processes disk specific ioctl calls 6521 * 6522 * Arguments: 6523 * dev - the device number 6524 * cmd - the operation [dkio(7I)] to be processed 6525 * arg - pointer to user provided structure 6526 * (contains data to be set or reference parameter for get) 6527 * mode - bit flag, indicating open settings, 32/64 bit type, etc 6528 * rvalp - pointer to return value for calling process. 6529 * 6530 * Return Code: 6531 * 0 6532 * EFAULT 6533 * ENXIO 6534 * EIO 6535 * ENOTSUP 6536 */ 6537 static int 6538 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, int *rvalp) 6539 { 6540 int instance = VDCUNIT(dev); 6541 vdc_t *vdc = NULL; 6542 int rv = -1; 6543 int idx = 0; /* index into dk_ioctl[] */ 6544 size_t len = 0; /* #bytes to send to vds */ 6545 size_t alloc_len = 0; /* #bytes to allocate mem for */ 6546 caddr_t mem_p = NULL; 6547 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 6548 vdc_dk_ioctl_t *iop; 6549 6550 vdc = ddi_get_soft_state(vdc_state, instance); 6551 if (vdc == NULL) { 6552 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 6553 instance); 6554 return (ENXIO); 6555 } 6556 6557 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 6558 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 6559 6560 if (rvalp != NULL) { 6561 /* the return value of the ioctl is 0 by default */ 6562 *rvalp = 0; 6563 } 6564 6565 /* 6566 * Validate the ioctl operation to be performed. 6567 * 6568 * If we have looped through the array without finding a match then we 6569 * don't support this ioctl. 6570 */ 6571 for (idx = 0; idx < nioctls; idx++) { 6572 if (cmd == dk_ioctl[idx].cmd) 6573 break; 6574 } 6575 6576 if (idx >= nioctls) { 6577 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 6578 vdc->instance, cmd); 6579 return (ENOTSUP); 6580 } 6581 6582 iop = &(dk_ioctl[idx]); 6583 6584 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 6585 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 6586 dk_efi_t dk_efi; 6587 6588 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 6589 if (rv != 0) 6590 return (EFAULT); 6591 6592 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 6593 } else { 6594 len = iop->nbytes; 6595 } 6596 6597 /* check if the ioctl is applicable */ 6598 switch (cmd) { 6599 case CDROMREADOFFSET: 6600 case DKIOCREMOVABLE: 6601 return (ENOTTY); 6602 6603 case USCSICMD: 6604 case MHIOCTKOWN: 6605 case MHIOCSTATUS: 6606 case MHIOCQRESERVE: 6607 case MHIOCRELEASE: 6608 case MHIOCGRP_INKEYS: 6609 case MHIOCGRP_INRESV: 6610 case MHIOCGRP_REGISTER: 6611 case MHIOCGRP_RESERVE: 6612 case MHIOCGRP_PREEMPTANDABORT: 6613 case MHIOCGRP_REGISTERANDIGNOREKEY: 6614 case MHIOCENFAILFAST: 6615 if (vdc->cinfo == NULL) 6616 return (ENXIO); 6617 if (vdc->cinfo->dki_ctype != DKC_SCSI_CCS) 6618 return (ENOTTY); 6619 break; 6620 6621 case DIOCTL_RWCMD: 6622 if (vdc->cinfo == NULL) 6623 return (ENXIO); 6624 if (vdc->cinfo->dki_ctype != DKC_DIRECT) 6625 return (ENOTTY); 6626 break; 6627 6628 case DKIOCINFO: 6629 if (vdc->cinfo == NULL) 6630 return (ENXIO); 6631 break; 6632 6633 case DKIOCGMEDIAINFO: 6634 if (vdc->minfo == NULL) 6635 return (ENXIO); 6636 if (vdc_check_capacity(vdc) != 0) 6637 /* disk capacity is not available */ 6638 return (EIO); 6639 break; 6640 } 6641 6642 /* 6643 * Deal with ioctls which require a processing different than 6644 * converting ioctl arguments and sending a corresponding 6645 * VD operation. 6646 */ 6647 switch (cmd) { 6648 6649 case USCSICMD: 6650 { 6651 return (vdc_uscsi_cmd(vdc, arg, mode)); 6652 } 6653 6654 case MHIOCTKOWN: 6655 { 6656 mutex_enter(&vdc->ownership_lock); 6657 /* 6658 * We have to set VDC_OWNERSHIP_WANTED now so that the ownership 6659 * can be flagged with VDC_OWNERSHIP_RESET if the LDC is reset 6660 * while we are processing the ioctl. 6661 */ 6662 vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED); 6663 6664 rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE | 6665 VD_ACCESS_SET_PREEMPT | VD_ACCESS_SET_PRESERVE, mode); 6666 if (rv == 0) { 6667 vdc_ownership_update(vdc, VDC_OWNERSHIP_WANTED | 6668 VDC_OWNERSHIP_GRANTED); 6669 } else { 6670 vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 6671 } 6672 mutex_exit(&vdc->ownership_lock); 6673 return (rv); 6674 } 6675 6676 case MHIOCRELEASE: 6677 { 6678 mutex_enter(&vdc->ownership_lock); 6679 rv = vdc_access_set(vdc, VD_ACCESS_SET_CLEAR, mode); 6680 if (rv == 0) { 6681 vdc_ownership_update(vdc, VDC_OWNERSHIP_NONE); 6682 } 6683 mutex_exit(&vdc->ownership_lock); 6684 return (rv); 6685 } 6686 6687 case MHIOCSTATUS: 6688 { 6689 uint64_t status; 6690 6691 rv = vdc_access_get(vdc, &status, mode); 6692 if (rv == 0 && rvalp != NULL) 6693 *rvalp = (status & VD_ACCESS_ALLOWED)? 0 : 1; 6694 return (rv); 6695 } 6696 6697 case MHIOCQRESERVE: 6698 { 6699 rv = vdc_access_set(vdc, VD_ACCESS_SET_EXCLUSIVE, mode); 6700 return (rv); 6701 } 6702 6703 case MHIOCGRP_INKEYS: 6704 { 6705 return (vdc_mhd_inkeys(vdc, arg, mode)); 6706 } 6707 6708 case MHIOCGRP_INRESV: 6709 { 6710 return (vdc_mhd_inresv(vdc, arg, mode)); 6711 } 6712 6713 case MHIOCGRP_REGISTER: 6714 { 6715 return (vdc_mhd_register(vdc, arg, mode)); 6716 } 6717 6718 case MHIOCGRP_RESERVE: 6719 { 6720 return (vdc_mhd_reserve(vdc, arg, mode)); 6721 } 6722 6723 case MHIOCGRP_PREEMPTANDABORT: 6724 { 6725 return (vdc_mhd_preemptabort(vdc, arg, mode)); 6726 } 6727 6728 case MHIOCGRP_REGISTERANDIGNOREKEY: 6729 { 6730 return (vdc_mhd_registerignore(vdc, arg, mode)); 6731 } 6732 6733 case MHIOCENFAILFAST: 6734 { 6735 rv = vdc_failfast(vdc, arg, mode); 6736 return (rv); 6737 } 6738 6739 case DIOCTL_RWCMD: 6740 { 6741 return (vdc_dioctl_rwcmd(dev, arg, mode)); 6742 } 6743 6744 case DKIOCGAPART: 6745 { 6746 return (vdc_dkio_gapart(vdc, arg, mode)); 6747 } 6748 6749 case DKIOCPARTITION: 6750 { 6751 return (vdc_dkio_partition(vdc, arg, mode)); 6752 } 6753 6754 case DKIOCINFO: 6755 { 6756 struct dk_cinfo cinfo; 6757 6758 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 6759 cinfo.dki_partition = VDCPART(dev); 6760 6761 rv = ddi_copyout(&cinfo, (void *)arg, 6762 sizeof (struct dk_cinfo), mode); 6763 if (rv != 0) 6764 return (EFAULT); 6765 6766 return (0); 6767 } 6768 6769 case DKIOCGMEDIAINFO: 6770 { 6771 ASSERT(vdc->vdisk_size != 0); 6772 if (vdc->minfo->dki_capacity == 0) 6773 vdc->minfo->dki_capacity = vdc->vdisk_size; 6774 rv = ddi_copyout(vdc->minfo, (void *)arg, 6775 sizeof (struct dk_minfo), mode); 6776 if (rv != 0) 6777 return (EFAULT); 6778 6779 return (0); 6780 } 6781 6782 case DKIOCFLUSHWRITECACHE: 6783 { 6784 struct dk_callback *dkc = 6785 (struct dk_callback *)(uintptr_t)arg; 6786 vdc_dk_arg_t *dkarg = NULL; 6787 6788 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 6789 instance, mode); 6790 6791 /* 6792 * If arg is NULL, then there is no callback function 6793 * registered and the call operates synchronously; we 6794 * break and continue with the rest of the function and 6795 * wait for vds to return (i.e. after the request to 6796 * vds returns successfully, all writes completed prior 6797 * to the ioctl will have been flushed from the disk 6798 * write cache to persistent media. 6799 * 6800 * If a callback function is registered, we dispatch 6801 * the request on a task queue and return immediately. 6802 * The callback will deal with informing the calling 6803 * thread that the flush request is completed. 6804 */ 6805 if (dkc == NULL) 6806 break; 6807 6808 /* 6809 * the asynchronous callback is only supported if 6810 * invoked from within the kernel 6811 */ 6812 if ((mode & FKIOCTL) == 0) 6813 return (ENOTSUP); 6814 6815 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 6816 6817 dkarg->mode = mode; 6818 dkarg->dev = dev; 6819 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 6820 6821 mutex_enter(&vdc->lock); 6822 vdc->dkio_flush_pending++; 6823 dkarg->vdc = vdc; 6824 mutex_exit(&vdc->lock); 6825 6826 /* put the request on a task queue */ 6827 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 6828 (void *)dkarg, DDI_SLEEP); 6829 if (rv == NULL) { 6830 /* clean up if dispatch fails */ 6831 mutex_enter(&vdc->lock); 6832 vdc->dkio_flush_pending--; 6833 mutex_exit(&vdc->lock); 6834 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 6835 } 6836 6837 return (rv == NULL ? ENOMEM : 0); 6838 } 6839 } 6840 6841 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 6842 ASSERT(iop->op != 0); 6843 6844 /* check if the vDisk server handles the operation for this vDisk */ 6845 if (VD_OP_SUPPORTED(vdc->operations, iop->op) == B_FALSE) { 6846 DMSG(vdc, 0, "[%d] Unsupported VD_OP operation (0x%x)\n", 6847 vdc->instance, iop->op); 6848 return (ENOTSUP); 6849 } 6850 6851 /* LDC requires that the memory being mapped is 8-byte aligned */ 6852 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 6853 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 6854 instance, len, alloc_len); 6855 6856 if (alloc_len > 0) 6857 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 6858 6859 /* 6860 * Call the conversion function for this ioctl which, if necessary, 6861 * converts from the Solaris format to the format ARC'ed 6862 * as part of the vDisk protocol (FWARC 2006/195) 6863 */ 6864 ASSERT(iop->convert != NULL); 6865 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 6866 if (rv != 0) { 6867 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 6868 instance, rv, cmd); 6869 if (mem_p != NULL) 6870 kmem_free(mem_p, alloc_len); 6871 return (rv); 6872 } 6873 6874 /* 6875 * send request to vds to service the ioctl. 6876 */ 6877 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 6878 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 6879 VIO_both_dir, B_TRUE); 6880 6881 if (rv != 0) { 6882 /* 6883 * This is not necessarily an error. The ioctl could 6884 * be returning a value such as ENOTTY to indicate 6885 * that the ioctl is not applicable. 6886 */ 6887 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 6888 instance, rv, cmd); 6889 if (mem_p != NULL) 6890 kmem_free(mem_p, alloc_len); 6891 6892 return (rv); 6893 } 6894 6895 /* 6896 * Call the conversion function (if it exists) for this ioctl 6897 * which converts from the format ARC'ed as part of the vDisk 6898 * protocol (FWARC 2006/195) back to a format understood by 6899 * the rest of Solaris. 6900 */ 6901 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 6902 if (rv != 0) { 6903 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 6904 instance, rv, cmd); 6905 if (mem_p != NULL) 6906 kmem_free(mem_p, alloc_len); 6907 return (rv); 6908 } 6909 6910 if (mem_p != NULL) 6911 kmem_free(mem_p, alloc_len); 6912 6913 return (rv); 6914 } 6915 6916 /* 6917 * Function: 6918 * 6919 * Description: 6920 * This is an empty conversion function used by ioctl calls which 6921 * do not need to convert the data being passed in/out to userland 6922 */ 6923 static int 6924 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 6925 { 6926 _NOTE(ARGUNUSED(vdc)) 6927 _NOTE(ARGUNUSED(from)) 6928 _NOTE(ARGUNUSED(to)) 6929 _NOTE(ARGUNUSED(mode)) 6930 _NOTE(ARGUNUSED(dir)) 6931 6932 return (0); 6933 } 6934 6935 static int 6936 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 6937 int mode, int dir) 6938 { 6939 _NOTE(ARGUNUSED(vdc)) 6940 6941 if (dir == VD_COPYIN) 6942 return (0); /* nothing to do */ 6943 6944 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 6945 return (EFAULT); 6946 6947 return (0); 6948 } 6949 6950 static int 6951 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 6952 int mode, int dir) 6953 { 6954 _NOTE(ARGUNUSED(vdc)) 6955 6956 if (dir == VD_COPYOUT) 6957 return (0); /* nothing to do */ 6958 6959 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 6960 return (EFAULT); 6961 6962 return (0); 6963 } 6964 6965 /* 6966 * Function: 6967 * vdc_get_vtoc_convert() 6968 * 6969 * Description: 6970 * This routine performs the necessary convertions from the DKIOCGVTOC 6971 * Solaris structure to the format defined in FWARC 2006/195. 6972 * 6973 * In the struct vtoc definition, the timestamp field is marked as not 6974 * supported so it is not part of vDisk protocol (FWARC 2006/195). 6975 * However SVM uses that field to check it can write into the VTOC, 6976 * so we fake up the info of that field. 6977 * 6978 * Arguments: 6979 * vdc - the vDisk client 6980 * from - the buffer containing the data to be copied from 6981 * to - the buffer to be copied to 6982 * mode - flags passed to ioctl() call 6983 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 6984 * 6985 * Return Code: 6986 * 0 - Success 6987 * ENXIO - incorrect buffer passed in. 6988 * EFAULT - ddi_copyout routine encountered an error. 6989 */ 6990 static int 6991 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 6992 { 6993 int i; 6994 void *tmp_mem = NULL; 6995 void *tmp_memp; 6996 struct vtoc vt; 6997 struct vtoc32 vt32; 6998 int copy_len = 0; 6999 int rv = 0; 7000 7001 if (dir != VD_COPYOUT) 7002 return (0); /* nothing to do */ 7003 7004 if ((from == NULL) || (to == NULL)) 7005 return (ENXIO); 7006 7007 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 7008 copy_len = sizeof (struct vtoc32); 7009 else 7010 copy_len = sizeof (struct vtoc); 7011 7012 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 7013 7014 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 7015 7016 /* fake the VTOC timestamp field */ 7017 for (i = 0; i < V_NUMPAR; i++) { 7018 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 7019 } 7020 7021 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 7022 /* LINTED E_ASSIGN_NARROW_CONV */ 7023 vtoctovtoc32(vt, vt32); 7024 tmp_memp = &vt32; 7025 } else { 7026 tmp_memp = &vt; 7027 } 7028 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 7029 if (rv != 0) 7030 rv = EFAULT; 7031 7032 kmem_free(tmp_mem, copy_len); 7033 return (rv); 7034 } 7035 7036 /* 7037 * Function: 7038 * vdc_set_vtoc_convert() 7039 * 7040 * Description: 7041 * This routine performs the necessary convertions from the DKIOCSVTOC 7042 * Solaris structure to the format defined in FWARC 2006/195. 7043 * 7044 * Arguments: 7045 * vdc - the vDisk client 7046 * from - Buffer with data 7047 * to - Buffer where data is to be copied to 7048 * mode - flags passed to ioctl 7049 * dir - direction of copy (in or out) 7050 * 7051 * Return Code: 7052 * 0 - Success 7053 * ENXIO - Invalid buffer passed in 7054 * EFAULT - ddi_copyin of data failed 7055 */ 7056 static int 7057 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7058 { 7059 _NOTE(ARGUNUSED(vdc)) 7060 7061 void *tmp_mem = NULL, *uvtoc; 7062 struct vtoc vt; 7063 struct vtoc *vtp = &vt; 7064 vd_vtoc_t vtvd; 7065 int copy_len = 0; 7066 int i, rv = 0; 7067 7068 if ((from == NULL) || (to == NULL)) 7069 return (ENXIO); 7070 7071 if (dir == VD_COPYIN) 7072 uvtoc = from; 7073 else 7074 uvtoc = to; 7075 7076 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 7077 copy_len = sizeof (struct vtoc32); 7078 else 7079 copy_len = sizeof (struct vtoc); 7080 7081 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 7082 7083 rv = ddi_copyin(uvtoc, tmp_mem, copy_len, mode); 7084 if (rv != 0) { 7085 kmem_free(tmp_mem, copy_len); 7086 return (EFAULT); 7087 } 7088 7089 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 7090 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 7091 } else { 7092 vtp = tmp_mem; 7093 } 7094 7095 if (dir == VD_COPYOUT) { 7096 /* 7097 * The disk label may have changed. Revalidate the disk 7098 * geometry. This will also update the device nodes and 7099 * properties. 7100 */ 7101 vdc_validate(vdc); 7102 7103 /* 7104 * We also need to keep track of the timestamp fields. 7105 */ 7106 for (i = 0; i < V_NUMPAR; i++) { 7107 vdc->vtoc->timestamp[i] = vtp->timestamp[i]; 7108 } 7109 7110 return (0); 7111 } 7112 7113 VTOC2VD_VTOC(vtp, &vtvd); 7114 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 7115 kmem_free(tmp_mem, copy_len); 7116 7117 return (0); 7118 } 7119 7120 /* 7121 * Function: 7122 * vdc_get_geom_convert() 7123 * 7124 * Description: 7125 * This routine performs the necessary convertions from the DKIOCGGEOM, 7126 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 7127 * defined in FWARC 2006/195 7128 * 7129 * Arguments: 7130 * vdc - the vDisk client 7131 * from - Buffer with data 7132 * to - Buffer where data is to be copied to 7133 * mode - flags passed to ioctl 7134 * dir - direction of copy (in or out) 7135 * 7136 * Return Code: 7137 * 0 - Success 7138 * ENXIO - Invalid buffer passed in 7139 * EFAULT - ddi_copyout of data failed 7140 */ 7141 static int 7142 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7143 { 7144 _NOTE(ARGUNUSED(vdc)) 7145 7146 struct dk_geom geom; 7147 int copy_len = sizeof (struct dk_geom); 7148 int rv = 0; 7149 7150 if (dir != VD_COPYOUT) 7151 return (0); /* nothing to do */ 7152 7153 if ((from == NULL) || (to == NULL)) 7154 return (ENXIO); 7155 7156 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 7157 rv = ddi_copyout(&geom, to, copy_len, mode); 7158 if (rv != 0) 7159 rv = EFAULT; 7160 7161 return (rv); 7162 } 7163 7164 /* 7165 * Function: 7166 * vdc_set_geom_convert() 7167 * 7168 * Description: 7169 * This routine performs the necessary convertions from the DKIOCSGEOM 7170 * Solaris structure to the format defined in FWARC 2006/195. 7171 * 7172 * Arguments: 7173 * vdc - the vDisk client 7174 * from - Buffer with data 7175 * to - Buffer where data is to be copied to 7176 * mode - flags passed to ioctl 7177 * dir - direction of copy (in or out) 7178 * 7179 * Return Code: 7180 * 0 - Success 7181 * ENXIO - Invalid buffer passed in 7182 * EFAULT - ddi_copyin of data failed 7183 */ 7184 static int 7185 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7186 { 7187 _NOTE(ARGUNUSED(vdc)) 7188 7189 vd_geom_t vdgeom; 7190 void *tmp_mem = NULL; 7191 int copy_len = sizeof (struct dk_geom); 7192 int rv = 0; 7193 7194 if (dir != VD_COPYIN) 7195 return (0); /* nothing to do */ 7196 7197 if ((from == NULL) || (to == NULL)) 7198 return (ENXIO); 7199 7200 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 7201 7202 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 7203 if (rv != 0) { 7204 kmem_free(tmp_mem, copy_len); 7205 return (EFAULT); 7206 } 7207 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 7208 bcopy(&vdgeom, to, sizeof (vdgeom)); 7209 kmem_free(tmp_mem, copy_len); 7210 7211 return (0); 7212 } 7213 7214 static int 7215 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7216 { 7217 _NOTE(ARGUNUSED(vdc)) 7218 7219 vd_efi_t *vd_efi; 7220 dk_efi_t dk_efi; 7221 int rv = 0; 7222 void *uaddr; 7223 7224 if ((from == NULL) || (to == NULL)) 7225 return (ENXIO); 7226 7227 if (dir == VD_COPYIN) { 7228 7229 vd_efi = (vd_efi_t *)to; 7230 7231 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 7232 if (rv != 0) 7233 return (EFAULT); 7234 7235 vd_efi->lba = dk_efi.dki_lba; 7236 vd_efi->length = dk_efi.dki_length; 7237 bzero(vd_efi->data, vd_efi->length); 7238 7239 } else { 7240 7241 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 7242 if (rv != 0) 7243 return (EFAULT); 7244 7245 uaddr = dk_efi.dki_data; 7246 7247 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 7248 7249 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 7250 7251 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 7252 mode); 7253 if (rv != 0) 7254 return (EFAULT); 7255 7256 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 7257 } 7258 7259 return (0); 7260 } 7261 7262 static int 7263 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 7264 { 7265 _NOTE(ARGUNUSED(vdc)) 7266 7267 dk_efi_t dk_efi; 7268 void *uaddr; 7269 7270 if (dir == VD_COPYOUT) { 7271 /* 7272 * The disk label may have changed. Revalidate the disk 7273 * geometry. This will also update the device nodes and 7274 * properties. 7275 */ 7276 vdc_validate(vdc); 7277 return (0); 7278 } 7279 7280 if ((from == NULL) || (to == NULL)) 7281 return (ENXIO); 7282 7283 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 7284 return (EFAULT); 7285 7286 uaddr = dk_efi.dki_data; 7287 7288 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 7289 7290 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 7291 return (EFAULT); 7292 7293 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 7294 7295 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 7296 7297 return (0); 7298 } 7299 7300 7301 /* -------------------------------------------------------------------------- */ 7302 7303 /* 7304 * Function: 7305 * vdc_create_fake_geometry() 7306 * 7307 * Description: 7308 * This routine fakes up the disk info needed for some DKIO ioctls such 7309 * as DKIOCINFO and DKIOCGMEDIAINFO [just like lofi(7D) and ramdisk(7D) do] 7310 * 7311 * Note: This function must not be called until the vDisk attributes have 7312 * been exchanged as part of the handshake with the vDisk server. 7313 * 7314 * Arguments: 7315 * vdc - soft state pointer for this instance of the device driver. 7316 * 7317 * Return Code: 7318 * none. 7319 */ 7320 static void 7321 vdc_create_fake_geometry(vdc_t *vdc) 7322 { 7323 ASSERT(vdc != NULL); 7324 ASSERT(vdc->max_xfer_sz != 0); 7325 7326 /* 7327 * DKIOCINFO support 7328 */ 7329 if (vdc->cinfo == NULL) 7330 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 7331 7332 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 7333 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 7334 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 7335 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 7336 7337 /* 7338 * We set the controller type to DKC_SCSI_CCS only if the VD_OP_SCSICMD 7339 * operation is supported, otherwise the controller type is DKC_DIRECT. 7340 * Version 1.0 does not support the VD_OP_SCSICMD operation, so the 7341 * controller type is always DKC_DIRECT in that case. 7342 * 7343 * If the virtual disk is backed by a physical CD/DVD device or 7344 * an ISO image, modify the controller type to indicate this 7345 */ 7346 switch (vdc->vdisk_media) { 7347 case VD_MEDIA_CD: 7348 case VD_MEDIA_DVD: 7349 vdc->cinfo->dki_ctype = DKC_CDROM; 7350 break; 7351 case VD_MEDIA_FIXED: 7352 if (VD_OP_SUPPORTED(vdc->operations, VD_OP_SCSICMD)) 7353 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 7354 else 7355 vdc->cinfo->dki_ctype = DKC_DIRECT; 7356 break; 7357 default: 7358 /* in the case of v1.0 we default to a fixed disk */ 7359 vdc->cinfo->dki_ctype = DKC_DIRECT; 7360 break; 7361 } 7362 vdc->cinfo->dki_flags = DKI_FMTVOL; 7363 vdc->cinfo->dki_cnum = 0; 7364 vdc->cinfo->dki_addr = 0; 7365 vdc->cinfo->dki_space = 0; 7366 vdc->cinfo->dki_prio = 0; 7367 vdc->cinfo->dki_vec = 0; 7368 vdc->cinfo->dki_unit = vdc->instance; 7369 vdc->cinfo->dki_slave = 0; 7370 /* 7371 * The partition number will be created on the fly depending on the 7372 * actual slice (i.e. minor node) that is used to request the data. 7373 */ 7374 vdc->cinfo->dki_partition = 0; 7375 7376 /* 7377 * DKIOCGMEDIAINFO support 7378 */ 7379 if (vdc->minfo == NULL) 7380 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 7381 7382 if (vio_ver_is_supported(vdc->ver, 1, 1)) { 7383 vdc->minfo->dki_media_type = 7384 VD_MEDIATYPE2DK_MEDIATYPE(vdc->vdisk_media); 7385 } else { 7386 vdc->minfo->dki_media_type = DK_FIXED_DISK; 7387 } 7388 7389 vdc->minfo->dki_capacity = vdc->vdisk_size; 7390 vdc->minfo->dki_lbsize = vdc->block_size; 7391 } 7392 7393 static ushort_t 7394 vdc_lbl2cksum(struct dk_label *label) 7395 { 7396 int count; 7397 ushort_t sum, *sp; 7398 7399 count = (sizeof (struct dk_label)) / (sizeof (short)) - 1; 7400 sp = (ushort_t *)label; 7401 sum = 0; 7402 while (count--) { 7403 sum ^= *sp++; 7404 } 7405 7406 return (sum); 7407 } 7408 7409 /* 7410 * Function: 7411 * vdc_validate_geometry 7412 * 7413 * Description: 7414 * This routine discovers the label and geometry of the disk. It stores 7415 * the disk label and related information in the vdc structure. If it 7416 * fails to validate the geometry or to discover the disk label then 7417 * the label is marked as unknown (VD_DISK_LABEL_UNK). 7418 * 7419 * Arguments: 7420 * vdc - soft state pointer for this instance of the device driver. 7421 * 7422 * Return Code: 7423 * 0 - success. 7424 * EINVAL - unknown disk label. 7425 * ENOTSUP - geometry not applicable (EFI label). 7426 * EIO - error accessing the disk. 7427 */ 7428 static int 7429 vdc_validate_geometry(vdc_t *vdc) 7430 { 7431 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 7432 dev_t dev; 7433 int rv, rval; 7434 struct dk_label label; 7435 struct dk_geom geom; 7436 struct vtoc vtoc; 7437 efi_gpt_t *gpt; 7438 efi_gpe_t *gpe; 7439 vd_efi_dev_t edev; 7440 7441 ASSERT(vdc != NULL); 7442 ASSERT(vdc->vtoc != NULL && vdc->geom != NULL); 7443 ASSERT(MUTEX_HELD(&vdc->lock)); 7444 7445 mutex_exit(&vdc->lock); 7446 7447 dev = makedevice(ddi_driver_major(vdc->dip), 7448 VD_MAKE_DEV(vdc->instance, 0)); 7449 7450 rv = vd_process_ioctl(dev, DKIOCGGEOM, (caddr_t)&geom, FKIOCTL, &rval); 7451 if (rv == 0) 7452 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)&vtoc, 7453 FKIOCTL, &rval); 7454 7455 if (rv == ENOTSUP) { 7456 /* 7457 * If the device does not support VTOC then we try 7458 * to read an EFI label. 7459 * 7460 * We need to know the block size and the disk size to 7461 * be able to read an EFI label. 7462 */ 7463 if (vdc->vdisk_size == 0) { 7464 if ((rv = vdc_check_capacity(vdc)) != 0) { 7465 mutex_enter(&vdc->lock); 7466 vdc_store_label_unk(vdc); 7467 return (rv); 7468 } 7469 } 7470 7471 VD_EFI_DEV_SET(edev, vdc, vd_process_efi_ioctl); 7472 7473 rv = vd_efi_alloc_and_read(&edev, &gpt, &gpe); 7474 7475 if (rv) { 7476 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 7477 vdc->instance, rv); 7478 mutex_enter(&vdc->lock); 7479 vdc_store_label_unk(vdc); 7480 return (EIO); 7481 } 7482 7483 mutex_enter(&vdc->lock); 7484 vdc_store_label_efi(vdc, gpt, gpe); 7485 vd_efi_free(&edev, gpt, gpe); 7486 return (ENOTSUP); 7487 } 7488 7489 if (rv != 0) { 7490 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 7491 vdc->instance, rv); 7492 mutex_enter(&vdc->lock); 7493 vdc_store_label_unk(vdc); 7494 if (rv != EINVAL) 7495 rv = EIO; 7496 return (rv); 7497 } 7498 7499 /* check that geometry and vtoc are valid */ 7500 if (geom.dkg_nhead == 0 || geom.dkg_nsect == 0 || 7501 vtoc.v_sanity != VTOC_SANE) { 7502 mutex_enter(&vdc->lock); 7503 vdc_store_label_unk(vdc); 7504 return (EINVAL); 7505 } 7506 7507 /* 7508 * We have a disk and a valid VTOC. However this does not mean 7509 * that the disk currently have a VTOC label. The returned VTOC may 7510 * be a default VTOC to be used for configuring the disk (this is 7511 * what is done for disk image). So we read the label from the 7512 * beginning of the disk to ensure we really have a VTOC label. 7513 * 7514 * FUTURE: This could be the default way for reading the VTOC 7515 * from the disk as opposed to sending the VD_OP_GET_VTOC 7516 * to the server. This will be the default if vdc is implemented 7517 * ontop of cmlb. 7518 */ 7519 7520 /* 7521 * Single slice disk does not support read using an absolute disk 7522 * offset so we just rely on the DKIOCGVTOC ioctl in that case. 7523 */ 7524 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 7525 mutex_enter(&vdc->lock); 7526 if (vtoc.v_nparts != 1) { 7527 vdc_store_label_unk(vdc); 7528 return (EINVAL); 7529 } 7530 vdc_store_label_vtoc(vdc, &geom, &vtoc); 7531 return (0); 7532 } 7533 7534 if (vtoc.v_nparts != V_NUMPAR) { 7535 mutex_enter(&vdc->lock); 7536 vdc_store_label_unk(vdc); 7537 return (EINVAL); 7538 } 7539 7540 /* 7541 * Read disk label from start of disk 7542 */ 7543 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 7544 bioinit(buf); 7545 buf->b_un.b_addr = (caddr_t)&label; 7546 buf->b_bcount = DK_LABEL_SIZE; 7547 buf->b_flags = B_BUSY | B_READ; 7548 buf->b_dev = cmpdev(dev); 7549 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)&label, 7550 DK_LABEL_SIZE, VD_SLICE_NONE, 0, CB_STRATEGY, buf, VIO_read_dir); 7551 if (rv) { 7552 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 7553 vdc->instance); 7554 } else { 7555 rv = biowait(buf); 7556 biofini(buf); 7557 } 7558 kmem_free(buf, sizeof (buf_t)); 7559 7560 if (rv != 0 || label.dkl_magic != DKL_MAGIC || 7561 label.dkl_cksum != vdc_lbl2cksum(&label)) { 7562 DMSG(vdc, 1, "[%d] Got VTOC with invalid label\n", 7563 vdc->instance); 7564 mutex_enter(&vdc->lock); 7565 vdc_store_label_unk(vdc); 7566 return (EINVAL); 7567 } 7568 7569 mutex_enter(&vdc->lock); 7570 vdc_store_label_vtoc(vdc, &geom, &vtoc); 7571 return (0); 7572 } 7573 7574 /* 7575 * Function: 7576 * vdc_validate 7577 * 7578 * Description: 7579 * This routine discovers the label of the disk and create the 7580 * appropriate device nodes if the label has changed. 7581 * 7582 * Arguments: 7583 * vdc - soft state pointer for this instance of the device driver. 7584 * 7585 * Return Code: 7586 * none. 7587 */ 7588 static void 7589 vdc_validate(vdc_t *vdc) 7590 { 7591 vd_disk_label_t old_label; 7592 vd_slice_t old_slice[V_NUMPAR]; 7593 int rv; 7594 7595 ASSERT(!MUTEX_HELD(&vdc->lock)); 7596 7597 mutex_enter(&vdc->lock); 7598 7599 /* save the current label and vtoc */ 7600 old_label = vdc->vdisk_label; 7601 bcopy(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR); 7602 7603 /* check the geometry */ 7604 (void) vdc_validate_geometry(vdc); 7605 7606 /* if the disk label has changed, update device nodes */ 7607 if (vdc->vdisk_label != old_label) { 7608 7609 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 7610 rv = vdc_create_device_nodes_efi(vdc); 7611 else 7612 rv = vdc_create_device_nodes_vtoc(vdc); 7613 7614 if (rv != 0) { 7615 DMSG(vdc, 0, "![%d] Failed to update device nodes", 7616 vdc->instance); 7617 } 7618 } 7619 7620 /* if the vtoc has changed, update device nodes properties */ 7621 if (bcmp(vdc->slice, &old_slice, sizeof (vd_slice_t) * V_NUMPAR) != 0) { 7622 7623 if (vdc_create_device_nodes_props(vdc) != 0) { 7624 DMSG(vdc, 0, "![%d] Failed to update device nodes" 7625 " properties", vdc->instance); 7626 } 7627 } 7628 7629 mutex_exit(&vdc->lock); 7630 } 7631 7632 static void 7633 vdc_validate_task(void *arg) 7634 { 7635 vdc_t *vdc = (vdc_t *)arg; 7636 7637 vdc_validate(vdc); 7638 7639 mutex_enter(&vdc->lock); 7640 ASSERT(vdc->validate_pending > 0); 7641 vdc->validate_pending--; 7642 mutex_exit(&vdc->lock); 7643 } 7644 7645 /* 7646 * Function: 7647 * vdc_setup_devid() 7648 * 7649 * Description: 7650 * This routine discovers the devid of a vDisk. It requests the devid of 7651 * the underlying device from the vDisk server, builds an encapsulated 7652 * devid based on the retrieved devid and registers that new devid to 7653 * the vDisk. 7654 * 7655 * Arguments: 7656 * vdc - soft state pointer for this instance of the device driver. 7657 * 7658 * Return Code: 7659 * 0 - A devid was succesfully registered for the vDisk 7660 */ 7661 static int 7662 vdc_setup_devid(vdc_t *vdc) 7663 { 7664 int rv; 7665 vd_devid_t *vd_devid; 7666 size_t bufsize, bufid_len; 7667 7668 /* 7669 * At first sight, we don't know the size of the devid that the 7670 * server will return but this size will be encoded into the 7671 * reply. So we do a first request using a default size then we 7672 * check if this size was large enough. If not then we do a second 7673 * request with the correct size returned by the server. Note that 7674 * ldc requires size to be 8-byte aligned. 7675 */ 7676 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 7677 sizeof (uint64_t)); 7678 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 7679 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 7680 7681 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 7682 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir, B_TRUE); 7683 7684 DMSG(vdc, 2, "sync_op returned %d\n", rv); 7685 7686 if (rv) { 7687 kmem_free(vd_devid, bufsize); 7688 return (rv); 7689 } 7690 7691 if (vd_devid->length > bufid_len) { 7692 /* 7693 * The returned devid is larger than the buffer used. Try again 7694 * with a buffer with the right size. 7695 */ 7696 kmem_free(vd_devid, bufsize); 7697 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 7698 sizeof (uint64_t)); 7699 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 7700 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 7701 7702 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 7703 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 7704 VIO_both_dir, B_TRUE); 7705 7706 if (rv) { 7707 kmem_free(vd_devid, bufsize); 7708 return (rv); 7709 } 7710 } 7711 7712 /* 7713 * The virtual disk should have the same device id as the one associated 7714 * with the physical disk it is mapped on, otherwise sharing a disk 7715 * between a LDom and a non-LDom may not work (for example for a shared 7716 * SVM disk set). 7717 * 7718 * The DDI framework does not allow creating a device id with any 7719 * type so we first create a device id of type DEVID_ENCAP and then 7720 * we restore the orignal type of the physical device. 7721 */ 7722 7723 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 7724 7725 /* build an encapsulated devid based on the returned devid */ 7726 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 7727 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 7728 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 7729 kmem_free(vd_devid, bufsize); 7730 return (1); 7731 } 7732 7733 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 7734 7735 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 7736 7737 kmem_free(vd_devid, bufsize); 7738 7739 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 7740 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 7741 return (1); 7742 } 7743 7744 return (0); 7745 } 7746 7747 static void 7748 vdc_store_label_efi(vdc_t *vdc, efi_gpt_t *gpt, efi_gpe_t *gpe) 7749 { 7750 int i, nparts; 7751 7752 ASSERT(MUTEX_HELD(&vdc->lock)); 7753 7754 vdc->vdisk_label = VD_DISK_LABEL_EFI; 7755 bzero(vdc->vtoc, sizeof (struct vtoc)); 7756 bzero(vdc->geom, sizeof (struct dk_geom)); 7757 bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 7758 7759 nparts = gpt->efi_gpt_NumberOfPartitionEntries; 7760 7761 for (i = 0; i < nparts && i < VD_EFI_WD_SLICE; i++) { 7762 7763 if (gpe[i].efi_gpe_StartingLBA == 0 || 7764 gpe[i].efi_gpe_EndingLBA == 0) { 7765 continue; 7766 } 7767 7768 vdc->slice[i].start = gpe[i].efi_gpe_StartingLBA; 7769 vdc->slice[i].nblocks = gpe[i].efi_gpe_EndingLBA - 7770 gpe[i].efi_gpe_StartingLBA + 1; 7771 } 7772 7773 ASSERT(vdc->vdisk_size != 0); 7774 vdc->slice[VD_EFI_WD_SLICE].start = 0; 7775 vdc->slice[VD_EFI_WD_SLICE].nblocks = vdc->vdisk_size; 7776 7777 } 7778 7779 static void 7780 vdc_store_label_vtoc(vdc_t *vdc, struct dk_geom *geom, struct vtoc *vtoc) 7781 { 7782 int i; 7783 7784 ASSERT(MUTEX_HELD(&vdc->lock)); 7785 ASSERT(vdc->block_size == vtoc->v_sectorsz); 7786 7787 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 7788 bcopy(vtoc, vdc->vtoc, sizeof (struct vtoc)); 7789 bcopy(geom, vdc->geom, sizeof (struct dk_geom)); 7790 bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 7791 7792 for (i = 0; i < vtoc->v_nparts; i++) { 7793 vdc->slice[i].start = vtoc->v_part[i].p_start; 7794 vdc->slice[i].nblocks = vtoc->v_part[i].p_size; 7795 } 7796 } 7797 7798 static void 7799 vdc_store_label_unk(vdc_t *vdc) 7800 { 7801 ASSERT(MUTEX_HELD(&vdc->lock)); 7802 7803 vdc->vdisk_label = VD_DISK_LABEL_UNK; 7804 bzero(vdc->vtoc, sizeof (struct vtoc)); 7805 bzero(vdc->geom, sizeof (struct dk_geom)); 7806 bzero(vdc->slice, sizeof (vd_slice_t) * V_NUMPAR); 7807 } 7808