1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * LDoms virtual disk client (vdc) device driver 31 * 32 * This driver runs on a guest logical domain and communicates with the virtual 33 * disk server (vds) driver running on the service domain which is exporting 34 * virtualized "disks" to the guest logical domain. 35 * 36 * The driver can be divided into four sections: 37 * 38 * 1) generic device driver housekeeping 39 * _init, _fini, attach, detach, ops structures, etc. 40 * 41 * 2) communication channel setup 42 * Setup the communications link over the LDC channel that vdc uses to 43 * talk to the vDisk server. Initialise the descriptor ring which 44 * allows the LDC clients to transfer data via memory mappings. 45 * 46 * 3) Support exported to upper layers (filesystems, etc) 47 * The upper layers call into vdc via strategy(9E) and DKIO(7I) 48 * ioctl calls. vdc will copy the data to be written to the descriptor 49 * ring or maps the buffer to store the data read by the vDisk 50 * server into the descriptor ring. It then sends a message to the 51 * vDisk server requesting it to complete the operation. 52 * 53 * 4) Handling responses from vDisk server. 54 * The vDisk server will ACK some or all of the messages vdc sends to it 55 * (this is configured during the handshake). Upon receipt of an ACK 56 * vdc will check the descriptor ring and signal to the upper layer 57 * code waiting on the IO. 58 */ 59 60 #include <sys/atomic.h> 61 #include <sys/conf.h> 62 #include <sys/disp.h> 63 #include <sys/ddi.h> 64 #include <sys/dkio.h> 65 #include <sys/efi_partition.h> 66 #include <sys/fcntl.h> 67 #include <sys/file.h> 68 #include <sys/mach_descrip.h> 69 #include <sys/modctl.h> 70 #include <sys/mdeg.h> 71 #include <sys/note.h> 72 #include <sys/open.h> 73 #include <sys/sdt.h> 74 #include <sys/stat.h> 75 #include <sys/sunddi.h> 76 #include <sys/types.h> 77 #include <sys/promif.h> 78 #include <sys/vtoc.h> 79 #include <sys/archsystm.h> 80 #include <sys/sysmacros.h> 81 82 #include <sys/cdio.h> 83 #include <sys/dktp/fdisk.h> 84 #include <sys/scsi/generic/sense.h> 85 #include <sys/scsi/impl/uscsi.h> /* Needed for defn of USCSICMD ioctl */ 86 87 #include <sys/ldoms.h> 88 #include <sys/ldc.h> 89 #include <sys/vio_common.h> 90 #include <sys/vio_mailbox.h> 91 #include <sys/vdsk_common.h> 92 #include <sys/vdsk_mailbox.h> 93 #include <sys/vdc.h> 94 95 /* 96 * function prototypes 97 */ 98 99 /* standard driver functions */ 100 static int vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred); 101 static int vdc_close(dev_t dev, int flag, int otyp, cred_t *cred); 102 static int vdc_strategy(struct buf *buf); 103 static int vdc_print(dev_t dev, char *str); 104 static int vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk); 105 static int vdc_read(dev_t dev, struct uio *uio, cred_t *cred); 106 static int vdc_write(dev_t dev, struct uio *uio, cred_t *cred); 107 static int vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, 108 cred_t *credp, int *rvalp); 109 static int vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred); 110 static int vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred); 111 112 static int vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, 113 void *arg, void **resultp); 114 static int vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd); 115 static int vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd); 116 117 /* setup */ 118 static void vdc_min(struct buf *bufp); 119 static int vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen); 120 static int vdc_do_ldc_init(vdc_t *vdc); 121 static int vdc_start_ldc_connection(vdc_t *vdc); 122 static int vdc_create_device_nodes(vdc_t *vdc); 123 static int vdc_create_device_nodes_efi(vdc_t *vdc); 124 static int vdc_create_device_nodes_vtoc(vdc_t *vdc); 125 static int vdc_create_device_nodes_props(vdc_t *vdc); 126 static int vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id); 127 static int vdc_do_ldc_up(vdc_t *vdc); 128 static void vdc_terminate_ldc(vdc_t *vdc); 129 static int vdc_init_descriptor_ring(vdc_t *vdc); 130 static void vdc_destroy_descriptor_ring(vdc_t *vdc); 131 static int vdc_setup_devid(vdc_t *vdc); 132 static void vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi); 133 134 /* handshake with vds */ 135 static int vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver); 136 static int vdc_ver_negotiation(vdc_t *vdcp); 137 static int vdc_init_attr_negotiation(vdc_t *vdc); 138 static int vdc_attr_negotiation(vdc_t *vdcp); 139 static int vdc_init_dring_negotiate(vdc_t *vdc); 140 static int vdc_dring_negotiation(vdc_t *vdcp); 141 static int vdc_send_rdx(vdc_t *vdcp); 142 static int vdc_rdx_exchange(vdc_t *vdcp); 143 static boolean_t vdc_is_supported_version(vio_ver_msg_t *ver_msg); 144 145 /* processing incoming messages from vDisk server */ 146 static void vdc_process_msg_thread(vdc_t *vdc); 147 static int vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp); 148 149 static uint_t vdc_handle_cb(uint64_t event, caddr_t arg); 150 static int vdc_process_data_msg(vdc_t *vdc, vio_msg_t *msg); 151 static int vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg); 152 static int vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg); 153 static int vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg); 154 static int vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *msg); 155 static int vdc_send_request(vdc_t *vdcp, int operation, 156 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 157 int cb_type, void *cb_arg, vio_desc_direction_t dir); 158 static int vdc_map_to_shared_dring(vdc_t *vdcp, int idx); 159 static int vdc_populate_descriptor(vdc_t *vdcp, int operation, 160 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 161 int cb_type, void *cb_arg, vio_desc_direction_t dir); 162 static int vdc_do_sync_op(vdc_t *vdcp, int operation, 163 caddr_t addr, size_t nbytes, int slice, diskaddr_t offset, 164 int cb_type, void *cb_arg, vio_desc_direction_t dir); 165 166 static int vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp); 167 static int vdc_drain_response(vdc_t *vdcp); 168 static int vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx); 169 static int vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep); 170 static int vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg); 171 172 /* dkio */ 173 static int vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode); 174 static int vdc_create_fake_geometry(vdc_t *vdc); 175 static int vdc_setup_disk_layout(vdc_t *vdc); 176 static int vdc_null_copy_func(vdc_t *vdc, void *from, void *to, 177 int mode, int dir); 178 static int vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 179 int mode, int dir); 180 static int vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 181 int mode, int dir); 182 static int vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, 183 int mode, int dir); 184 static int vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, 185 int mode, int dir); 186 static int vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, 187 int mode, int dir); 188 static int vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, 189 int mode, int dir); 190 static int vdc_uscsicmd_convert(vdc_t *vdc, void *from, void *to, 191 int mode, int dir); 192 static int vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, 193 int mode, int dir); 194 static int vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, 195 int mode, int dir); 196 197 /* 198 * Module variables 199 */ 200 201 /* 202 * Tunable variables to control how long vdc waits before timing out on 203 * various operations 204 */ 205 static int vdc_retries = 10; 206 207 /* calculated from 'vdc_usec_timeout' during attach */ 208 static uint64_t vdc_hz_timeout; /* units: Hz */ 209 static uint64_t vdc_usec_timeout = 30 * MICROSEC; /* 30s units: ns */ 210 211 static uint64_t vdc_hz_min_ldc_delay; 212 static uint64_t vdc_min_timeout_ldc = 1 * MILLISEC; 213 static uint64_t vdc_hz_max_ldc_delay; 214 static uint64_t vdc_max_timeout_ldc = 100 * MILLISEC; 215 216 static uint64_t vdc_ldc_read_init_delay = 1 * MILLISEC; 217 static uint64_t vdc_ldc_read_max_delay = 100 * MILLISEC; 218 219 /* values for dumping - need to run in a tighter loop */ 220 static uint64_t vdc_usec_timeout_dump = 100 * MILLISEC; /* 0.1s units: ns */ 221 static int vdc_dump_retries = 100; 222 223 /* Count of the number of vdc instances attached */ 224 static volatile uint32_t vdc_instance_count = 0; 225 226 /* Soft state pointer */ 227 static void *vdc_state; 228 229 /* 230 * Controlling the verbosity of the error/debug messages 231 * 232 * vdc_msglevel - controls level of messages 233 * vdc_matchinst - 64-bit variable where each bit corresponds 234 * to the vdc instance the vdc_msglevel applies. 235 */ 236 int vdc_msglevel = 0x0; 237 uint64_t vdc_matchinst = 0ull; 238 239 /* 240 * Supported vDisk protocol version pairs. 241 * 242 * The first array entry is the latest and preferred version. 243 */ 244 static const vio_ver_t vdc_version[] = {{1, 0}}; 245 246 static struct cb_ops vdc_cb_ops = { 247 vdc_open, /* cb_open */ 248 vdc_close, /* cb_close */ 249 vdc_strategy, /* cb_strategy */ 250 vdc_print, /* cb_print */ 251 vdc_dump, /* cb_dump */ 252 vdc_read, /* cb_read */ 253 vdc_write, /* cb_write */ 254 vdc_ioctl, /* cb_ioctl */ 255 nodev, /* cb_devmap */ 256 nodev, /* cb_mmap */ 257 nodev, /* cb_segmap */ 258 nochpoll, /* cb_chpoll */ 259 ddi_prop_op, /* cb_prop_op */ 260 NULL, /* cb_str */ 261 D_MP | D_64BIT, /* cb_flag */ 262 CB_REV, /* cb_rev */ 263 vdc_aread, /* cb_aread */ 264 vdc_awrite /* cb_awrite */ 265 }; 266 267 static struct dev_ops vdc_ops = { 268 DEVO_REV, /* devo_rev */ 269 0, /* devo_refcnt */ 270 vdc_getinfo, /* devo_getinfo */ 271 nulldev, /* devo_identify */ 272 nulldev, /* devo_probe */ 273 vdc_attach, /* devo_attach */ 274 vdc_detach, /* devo_detach */ 275 nodev, /* devo_reset */ 276 &vdc_cb_ops, /* devo_cb_ops */ 277 NULL, /* devo_bus_ops */ 278 nulldev /* devo_power */ 279 }; 280 281 static struct modldrv modldrv = { 282 &mod_driverops, 283 "virtual disk client %I%", 284 &vdc_ops, 285 }; 286 287 static struct modlinkage modlinkage = { 288 MODREV_1, 289 &modldrv, 290 NULL 291 }; 292 293 /* -------------------------------------------------------------------------- */ 294 295 /* 296 * Device Driver housekeeping and setup 297 */ 298 299 int 300 _init(void) 301 { 302 int status; 303 304 if ((status = ddi_soft_state_init(&vdc_state, sizeof (vdc_t), 1)) != 0) 305 return (status); 306 if ((status = mod_install(&modlinkage)) != 0) 307 ddi_soft_state_fini(&vdc_state); 308 vdc_efi_init(vd_process_ioctl); 309 return (status); 310 } 311 312 int 313 _info(struct modinfo *modinfop) 314 { 315 return (mod_info(&modlinkage, modinfop)); 316 } 317 318 int 319 _fini(void) 320 { 321 int status; 322 323 if ((status = mod_remove(&modlinkage)) != 0) 324 return (status); 325 vdc_efi_fini(); 326 ddi_soft_state_fini(&vdc_state); 327 return (0); 328 } 329 330 static int 331 vdc_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp) 332 { 333 _NOTE(ARGUNUSED(dip)) 334 335 int instance = VDCUNIT((dev_t)arg); 336 vdc_t *vdc = NULL; 337 338 switch (cmd) { 339 case DDI_INFO_DEVT2DEVINFO: 340 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 341 *resultp = NULL; 342 return (DDI_FAILURE); 343 } 344 *resultp = vdc->dip; 345 return (DDI_SUCCESS); 346 case DDI_INFO_DEVT2INSTANCE: 347 *resultp = (void *)(uintptr_t)instance; 348 return (DDI_SUCCESS); 349 default: 350 *resultp = NULL; 351 return (DDI_FAILURE); 352 } 353 } 354 355 static int 356 vdc_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) 357 { 358 int instance; 359 int rv; 360 vdc_t *vdc = NULL; 361 362 switch (cmd) { 363 case DDI_DETACH: 364 /* the real work happens below */ 365 break; 366 case DDI_SUSPEND: 367 /* nothing to do for this non-device */ 368 return (DDI_SUCCESS); 369 default: 370 return (DDI_FAILURE); 371 } 372 373 ASSERT(cmd == DDI_DETACH); 374 instance = ddi_get_instance(dip); 375 DMSGX(1, "[%d] Entered\n", instance); 376 377 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 378 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 379 return (DDI_FAILURE); 380 } 381 382 if (vdc->open_count) { 383 DMSG(vdc, 0, "[%d] Cannot detach: device is open", instance); 384 return (DDI_FAILURE); 385 } 386 387 DMSG(vdc, 0, "[%d] proceeding...\n", instance); 388 389 /* mark instance as detaching */ 390 vdc->lifecycle = VDC_LC_DETACHING; 391 392 /* 393 * try and disable callbacks to prevent another handshake 394 */ 395 rv = ldc_set_cb_mode(vdc->ldc_handle, LDC_CB_DISABLE); 396 DMSG(vdc, 0, "callback disabled (rv=%d)\n", rv); 397 398 if (vdc->initialized & VDC_THREAD) { 399 mutex_enter(&vdc->read_lock); 400 if ((vdc->read_state == VDC_READ_WAITING) || 401 (vdc->read_state == VDC_READ_RESET)) { 402 vdc->read_state = VDC_READ_RESET; 403 cv_signal(&vdc->read_cv); 404 } 405 406 mutex_exit(&vdc->read_lock); 407 408 /* wake up any thread waiting for connection to come online */ 409 mutex_enter(&vdc->lock); 410 if (vdc->state == VDC_STATE_INIT_WAITING) { 411 DMSG(vdc, 0, 412 "[%d] write reset - move to resetting state...\n", 413 instance); 414 vdc->state = VDC_STATE_RESETTING; 415 cv_signal(&vdc->initwait_cv); 416 } 417 mutex_exit(&vdc->lock); 418 419 /* now wait until state transitions to VDC_STATE_DETACH */ 420 thread_join(vdc->msg_proc_thr->t_did); 421 ASSERT(vdc->state == VDC_STATE_DETACH); 422 DMSG(vdc, 0, "[%d] Reset thread exit and join ..\n", 423 vdc->instance); 424 } 425 426 mutex_enter(&vdc->lock); 427 428 if (vdc->initialized & VDC_DRING) 429 vdc_destroy_descriptor_ring(vdc); 430 431 if (vdc->initialized & VDC_LDC) 432 vdc_terminate_ldc(vdc); 433 434 mutex_exit(&vdc->lock); 435 436 if (vdc->initialized & VDC_MINOR) { 437 ddi_prop_remove_all(dip); 438 ddi_remove_minor_node(dip, NULL); 439 } 440 441 if (vdc->initialized & VDC_LOCKS) { 442 mutex_destroy(&vdc->lock); 443 mutex_destroy(&vdc->read_lock); 444 cv_destroy(&vdc->initwait_cv); 445 cv_destroy(&vdc->dring_free_cv); 446 cv_destroy(&vdc->membind_cv); 447 cv_destroy(&vdc->sync_pending_cv); 448 cv_destroy(&vdc->sync_blocked_cv); 449 cv_destroy(&vdc->read_cv); 450 cv_destroy(&vdc->running_cv); 451 } 452 453 if (vdc->minfo) 454 kmem_free(vdc->minfo, sizeof (struct dk_minfo)); 455 456 if (vdc->cinfo) 457 kmem_free(vdc->cinfo, sizeof (struct dk_cinfo)); 458 459 if (vdc->vtoc) 460 kmem_free(vdc->vtoc, sizeof (struct vtoc)); 461 462 if (vdc->label) 463 kmem_free(vdc->label, DK_LABEL_SIZE); 464 465 if (vdc->devid) { 466 ddi_devid_unregister(dip); 467 ddi_devid_free(vdc->devid); 468 } 469 470 if (vdc->initialized & VDC_SOFT_STATE) 471 ddi_soft_state_free(vdc_state, instance); 472 473 DMSG(vdc, 0, "[%d] End %p\n", instance, (void *)vdc); 474 475 return (DDI_SUCCESS); 476 } 477 478 479 static int 480 vdc_do_attach(dev_info_t *dip) 481 { 482 int instance; 483 vdc_t *vdc = NULL; 484 int status; 485 486 ASSERT(dip != NULL); 487 488 instance = ddi_get_instance(dip); 489 if (ddi_soft_state_zalloc(vdc_state, instance) != DDI_SUCCESS) { 490 cmn_err(CE_NOTE, "[%d] Couldn't alloc state structure", 491 instance); 492 return (DDI_FAILURE); 493 } 494 495 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 496 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 497 return (DDI_FAILURE); 498 } 499 500 /* 501 * We assign the value to initialized in this case to zero out the 502 * variable and then set bits in it to indicate what has been done 503 */ 504 vdc->initialized = VDC_SOFT_STATE; 505 506 vdc_hz_timeout = drv_usectohz(vdc_usec_timeout); 507 508 vdc_hz_min_ldc_delay = drv_usectohz(vdc_min_timeout_ldc); 509 vdc_hz_max_ldc_delay = drv_usectohz(vdc_max_timeout_ldc); 510 511 vdc->dip = dip; 512 vdc->instance = instance; 513 vdc->open_count = 0; 514 vdc->vdisk_type = VD_DISK_TYPE_UNK; 515 vdc->vdisk_label = VD_DISK_LABEL_UNK; 516 vdc->state = VDC_STATE_INIT; 517 vdc->lifecycle = VDC_LC_ATTACHING; 518 vdc->ldc_state = 0; 519 vdc->session_id = 0; 520 vdc->block_size = DEV_BSIZE; 521 vdc->max_xfer_sz = maxphys / DEV_BSIZE; 522 523 vdc->vtoc = NULL; 524 vdc->cinfo = NULL; 525 vdc->minfo = NULL; 526 527 mutex_init(&vdc->lock, NULL, MUTEX_DRIVER, NULL); 528 cv_init(&vdc->initwait_cv, NULL, CV_DRIVER, NULL); 529 cv_init(&vdc->dring_free_cv, NULL, CV_DRIVER, NULL); 530 cv_init(&vdc->membind_cv, NULL, CV_DRIVER, NULL); 531 cv_init(&vdc->running_cv, NULL, CV_DRIVER, NULL); 532 533 vdc->threads_pending = 0; 534 vdc->sync_op_pending = B_FALSE; 535 vdc->sync_op_blocked = B_FALSE; 536 cv_init(&vdc->sync_pending_cv, NULL, CV_DRIVER, NULL); 537 cv_init(&vdc->sync_blocked_cv, NULL, CV_DRIVER, NULL); 538 539 /* init blocking msg read functionality */ 540 mutex_init(&vdc->read_lock, NULL, MUTEX_DRIVER, NULL); 541 cv_init(&vdc->read_cv, NULL, CV_DRIVER, NULL); 542 vdc->read_state = VDC_READ_IDLE; 543 544 vdc->initialized |= VDC_LOCKS; 545 546 /* initialise LDC channel which will be used to communicate with vds */ 547 if ((status = vdc_do_ldc_init(vdc)) != 0) { 548 cmn_err(CE_NOTE, "[%d] Couldn't initialize LDC", instance); 549 goto return_status; 550 } 551 552 /* initialize the thread responsible for managing state with server */ 553 vdc->msg_proc_thr = thread_create(NULL, 0, vdc_process_msg_thread, 554 vdc, 0, &p0, TS_RUN, minclsyspri); 555 if (vdc->msg_proc_thr == NULL) { 556 cmn_err(CE_NOTE, "[%d] Failed to create msg processing thread", 557 instance); 558 return (DDI_FAILURE); 559 } 560 561 vdc->initialized |= VDC_THREAD; 562 563 atomic_inc_32(&vdc_instance_count); 564 565 /* 566 * Once the handshake is complete, we can use the DRing to send 567 * requests to the vDisk server to calculate the geometry and 568 * VTOC of the "disk" 569 */ 570 status = vdc_setup_disk_layout(vdc); 571 if (status != 0) { 572 DMSG(vdc, 0, "[%d] Failed to discover disk layout (err%d)", 573 vdc->instance, status); 574 goto return_status; 575 } 576 577 /* 578 * Now that we have the device info we can create the 579 * device nodes and properties 580 */ 581 status = vdc_create_device_nodes(vdc); 582 if (status) { 583 DMSG(vdc, 0, "[%d] Failed to create device nodes", 584 instance); 585 goto return_status; 586 } 587 status = vdc_create_device_nodes_props(vdc); 588 if (status) { 589 DMSG(vdc, 0, "[%d] Failed to create device nodes" 590 " properties (%d)", instance, status); 591 goto return_status; 592 } 593 594 /* 595 * Setup devid 596 */ 597 if (vdc_setup_devid(vdc)) { 598 DMSG(vdc, 0, "[%d] No device id available\n", instance); 599 } 600 601 ddi_report_dev(dip); 602 vdc->lifecycle = VDC_LC_ONLINE; 603 DMSG(vdc, 0, "[%d] Attach tasks successful\n", instance); 604 605 return_status: 606 DMSG(vdc, 0, "[%d] Attach completed\n", instance); 607 return (status); 608 } 609 610 static int 611 vdc_attach(dev_info_t *dip, ddi_attach_cmd_t cmd) 612 { 613 int status; 614 615 switch (cmd) { 616 case DDI_ATTACH: 617 if ((status = vdc_do_attach(dip)) != 0) 618 (void) vdc_detach(dip, DDI_DETACH); 619 return (status); 620 case DDI_RESUME: 621 /* nothing to do for this non-device */ 622 return (DDI_SUCCESS); 623 default: 624 return (DDI_FAILURE); 625 } 626 } 627 628 static int 629 vdc_do_ldc_init(vdc_t *vdc) 630 { 631 int status = 0; 632 ldc_status_t ldc_state; 633 ldc_attr_t ldc_attr; 634 uint64_t ldc_id = 0; 635 dev_info_t *dip = NULL; 636 637 ASSERT(vdc != NULL); 638 639 dip = vdc->dip; 640 vdc->initialized |= VDC_LDC; 641 642 if ((status = vdc_get_ldc_id(dip, &ldc_id)) != 0) { 643 DMSG(vdc, 0, "[%d] Failed to get LDC channel ID property", 644 vdc->instance); 645 return (EIO); 646 } 647 vdc->ldc_id = ldc_id; 648 649 ldc_attr.devclass = LDC_DEV_BLK; 650 ldc_attr.instance = vdc->instance; 651 ldc_attr.mode = LDC_MODE_UNRELIABLE; /* unreliable transport */ 652 ldc_attr.mtu = VD_LDC_MTU; 653 654 if ((vdc->initialized & VDC_LDC_INIT) == 0) { 655 status = ldc_init(ldc_id, &ldc_attr, &vdc->ldc_handle); 656 if (status != 0) { 657 DMSG(vdc, 0, "[%d] ldc_init(chan %ld) returned %d", 658 vdc->instance, ldc_id, status); 659 return (status); 660 } 661 vdc->initialized |= VDC_LDC_INIT; 662 } 663 status = ldc_status(vdc->ldc_handle, &ldc_state); 664 if (status != 0) { 665 DMSG(vdc, 0, "[%d] Cannot discover LDC status [err=%d]", 666 vdc->instance, status); 667 return (status); 668 } 669 vdc->ldc_state = ldc_state; 670 671 if ((vdc->initialized & VDC_LDC_CB) == 0) { 672 status = ldc_reg_callback(vdc->ldc_handle, vdc_handle_cb, 673 (caddr_t)vdc); 674 if (status != 0) { 675 DMSG(vdc, 0, "[%d] LDC callback reg. failed (%d)", 676 vdc->instance, status); 677 return (status); 678 } 679 vdc->initialized |= VDC_LDC_CB; 680 } 681 682 vdc->initialized |= VDC_LDC; 683 684 /* 685 * At this stage we have initialised LDC, we will now try and open 686 * the connection. 687 */ 688 if (vdc->ldc_state == LDC_INIT) { 689 status = ldc_open(vdc->ldc_handle); 690 if (status != 0) { 691 DMSG(vdc, 0, "[%d] ldc_open(chan %ld) returned %d", 692 vdc->instance, vdc->ldc_id, status); 693 return (status); 694 } 695 vdc->initialized |= VDC_LDC_OPEN; 696 } 697 698 return (status); 699 } 700 701 static int 702 vdc_start_ldc_connection(vdc_t *vdc) 703 { 704 int status = 0; 705 706 ASSERT(vdc != NULL); 707 708 ASSERT(MUTEX_HELD(&vdc->lock)); 709 710 status = vdc_do_ldc_up(vdc); 711 712 DMSG(vdc, 0, "[%d] Finished bringing up LDC\n", vdc->instance); 713 714 return (status); 715 } 716 717 static int 718 vdc_stop_ldc_connection(vdc_t *vdcp) 719 { 720 int status; 721 722 DMSG(vdcp, 0, ": Resetting connection to vDisk server : state %d\n", 723 vdcp->state); 724 725 status = ldc_down(vdcp->ldc_handle); 726 DMSG(vdcp, 0, "ldc_down() = %d\n", status); 727 728 vdcp->initialized &= ~VDC_HANDSHAKE; 729 DMSG(vdcp, 0, "initialized=%x\n", vdcp->initialized); 730 731 return (status); 732 } 733 734 static int 735 vdc_create_device_nodes_efi(vdc_t *vdc) 736 { 737 ddi_remove_minor_node(vdc->dip, "h"); 738 ddi_remove_minor_node(vdc->dip, "h,raw"); 739 740 if (ddi_create_minor_node(vdc->dip, "wd", S_IFBLK, 741 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 742 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 743 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd'", 744 vdc->instance); 745 return (EIO); 746 } 747 748 /* if any device node is created we set this flag */ 749 vdc->initialized |= VDC_MINOR; 750 751 if (ddi_create_minor_node(vdc->dip, "wd,raw", S_IFCHR, 752 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 753 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 754 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'wd,raw'", 755 vdc->instance); 756 return (EIO); 757 } 758 759 return (0); 760 } 761 762 static int 763 vdc_create_device_nodes_vtoc(vdc_t *vdc) 764 { 765 ddi_remove_minor_node(vdc->dip, "wd"); 766 ddi_remove_minor_node(vdc->dip, "wd,raw"); 767 768 if (ddi_create_minor_node(vdc->dip, "h", S_IFBLK, 769 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 770 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 771 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h'", 772 vdc->instance); 773 return (EIO); 774 } 775 776 /* if any device node is created we set this flag */ 777 vdc->initialized |= VDC_MINOR; 778 779 if (ddi_create_minor_node(vdc->dip, "h,raw", S_IFCHR, 780 VD_MAKE_DEV(vdc->instance, VD_EFI_WD_SLICE), 781 DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 782 cmn_err(CE_NOTE, "[%d] Couldn't add block node 'h,raw'", 783 vdc->instance); 784 return (EIO); 785 } 786 787 return (0); 788 } 789 790 /* 791 * Function: 792 * vdc_create_device_nodes 793 * 794 * Description: 795 * This function creates the block and character device nodes under 796 * /devices along with the node properties. It is called as part of 797 * the attach(9E) of the instance during the handshake with vds after 798 * vds has sent the attributes to vdc. 799 * 800 * If the device is of type VD_DISK_TYPE_SLICE then the minor node 801 * of 2 is used in keeping with the Solaris convention that slice 2 802 * refers to a whole disk. Slices start at 'a' 803 * 804 * Parameters: 805 * vdc - soft state pointer 806 * 807 * Return Values 808 * 0 - Success 809 * EIO - Failed to create node 810 * EINVAL - Unknown type of disk exported 811 */ 812 static int 813 vdc_create_device_nodes(vdc_t *vdc) 814 { 815 char name[sizeof ("s,raw")]; 816 dev_info_t *dip = NULL; 817 int instance, status; 818 int num_slices = 1; 819 int i; 820 821 ASSERT(vdc != NULL); 822 823 instance = vdc->instance; 824 dip = vdc->dip; 825 826 switch (vdc->vdisk_type) { 827 case VD_DISK_TYPE_DISK: 828 num_slices = V_NUMPAR; 829 break; 830 case VD_DISK_TYPE_SLICE: 831 num_slices = 1; 832 break; 833 case VD_DISK_TYPE_UNK: 834 default: 835 return (EINVAL); 836 } 837 838 /* 839 * Minor nodes are different for EFI disks: EFI disks do not have 840 * a minor node 'g' for the minor number corresponding to slice 841 * VD_EFI_WD_SLICE (slice 7) instead they have a minor node 'wd' 842 * representing the whole disk. 843 */ 844 for (i = 0; i < num_slices; i++) { 845 846 if (i == VD_EFI_WD_SLICE) { 847 if (vdc->vdisk_label == VD_DISK_LABEL_EFI) 848 status = vdc_create_device_nodes_efi(vdc); 849 else 850 status = vdc_create_device_nodes_vtoc(vdc); 851 if (status != 0) 852 return (status); 853 continue; 854 } 855 856 (void) snprintf(name, sizeof (name), "%c", 'a' + i); 857 if (ddi_create_minor_node(dip, name, S_IFBLK, 858 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 859 cmn_err(CE_NOTE, "[%d] Couldn't add block node '%s'", 860 instance, name); 861 return (EIO); 862 } 863 864 /* if any device node is created we set this flag */ 865 vdc->initialized |= VDC_MINOR; 866 867 (void) snprintf(name, sizeof (name), "%c%s", 868 'a' + i, ",raw"); 869 if (ddi_create_minor_node(dip, name, S_IFCHR, 870 VD_MAKE_DEV(instance, i), DDI_NT_BLOCK, 0) != DDI_SUCCESS) { 871 cmn_err(CE_NOTE, "[%d] Couldn't add raw node '%s'", 872 instance, name); 873 return (EIO); 874 } 875 } 876 877 return (0); 878 } 879 880 /* 881 * Function: 882 * vdc_create_device_nodes_props 883 * 884 * Description: 885 * This function creates the block and character device nodes under 886 * /devices along with the node properties. It is called as part of 887 * the attach(9E) of the instance during the handshake with vds after 888 * vds has sent the attributes to vdc. 889 * 890 * Parameters: 891 * vdc - soft state pointer 892 * 893 * Return Values 894 * 0 - Success 895 * EIO - Failed to create device node property 896 * EINVAL - Unknown type of disk exported 897 */ 898 static int 899 vdc_create_device_nodes_props(vdc_t *vdc) 900 { 901 dev_info_t *dip = NULL; 902 int instance; 903 int num_slices = 1; 904 int64_t size = 0; 905 dev_t dev; 906 int rv; 907 int i; 908 909 ASSERT(vdc != NULL); 910 911 instance = vdc->instance; 912 dip = vdc->dip; 913 914 if ((vdc->vtoc == NULL) || (vdc->vtoc->v_sanity != VTOC_SANE)) { 915 DMSG(vdc, 0, "![%d] Could not create device node property." 916 " No VTOC available", instance); 917 return (ENXIO); 918 } 919 920 switch (vdc->vdisk_type) { 921 case VD_DISK_TYPE_DISK: 922 num_slices = V_NUMPAR; 923 break; 924 case VD_DISK_TYPE_SLICE: 925 num_slices = 1; 926 break; 927 case VD_DISK_TYPE_UNK: 928 default: 929 return (EINVAL); 930 } 931 932 for (i = 0; i < num_slices; i++) { 933 dev = makedevice(ddi_driver_major(dip), 934 VD_MAKE_DEV(instance, i)); 935 936 size = vdc->vtoc->v_part[i].p_size * vdc->vtoc->v_sectorsz; 937 DMSG(vdc, 0, "[%d] sz %ld (%ld Mb) p_size %lx\n", 938 instance, size, size / (1024 * 1024), 939 vdc->vtoc->v_part[i].p_size); 940 941 rv = ddi_prop_update_int64(dev, dip, VDC_SIZE_PROP_NAME, size); 942 if (rv != DDI_PROP_SUCCESS) { 943 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop of [%ld]", 944 instance, VDC_SIZE_PROP_NAME, size); 945 return (EIO); 946 } 947 948 rv = ddi_prop_update_int64(dev, dip, VDC_NBLOCKS_PROP_NAME, 949 lbtodb(size)); 950 if (rv != DDI_PROP_SUCCESS) { 951 cmn_err(CE_NOTE, "[%d] Couldn't add '%s' prop [%llu]", 952 instance, VDC_NBLOCKS_PROP_NAME, lbtodb(size)); 953 return (EIO); 954 } 955 } 956 957 return (0); 958 } 959 960 static int 961 vdc_open(dev_t *dev, int flag, int otyp, cred_t *cred) 962 { 963 _NOTE(ARGUNUSED(cred)) 964 965 int instance; 966 vdc_t *vdc; 967 968 ASSERT(dev != NULL); 969 instance = VDCUNIT(*dev); 970 971 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 972 return (EINVAL); 973 974 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 975 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 976 return (ENXIO); 977 } 978 979 DMSG(vdc, 0, "minor = %d flag = %x, otyp = %x\n", 980 getminor(*dev), flag, otyp); 981 982 mutex_enter(&vdc->lock); 983 vdc->open_count++; 984 mutex_exit(&vdc->lock); 985 986 return (0); 987 } 988 989 static int 990 vdc_close(dev_t dev, int flag, int otyp, cred_t *cred) 991 { 992 _NOTE(ARGUNUSED(cred)) 993 994 int instance; 995 vdc_t *vdc; 996 997 instance = VDCUNIT(dev); 998 999 if ((otyp != OTYP_CHR) && (otyp != OTYP_BLK)) 1000 return (EINVAL); 1001 1002 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1003 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1004 return (ENXIO); 1005 } 1006 1007 DMSG(vdc, 0, "[%d] flag = %x, otyp = %x\n", instance, flag, otyp); 1008 if (vdc->dkio_flush_pending) { 1009 DMSG(vdc, 0, 1010 "[%d] Cannot detach: %d outstanding DKIO flushes\n", 1011 instance, vdc->dkio_flush_pending); 1012 return (EBUSY); 1013 } 1014 1015 /* 1016 * Should not need the mutex here, since the framework should protect 1017 * against more opens on this device, but just in case. 1018 */ 1019 mutex_enter(&vdc->lock); 1020 vdc->open_count--; 1021 mutex_exit(&vdc->lock); 1022 1023 return (0); 1024 } 1025 1026 static int 1027 vdc_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, int *rvalp) 1028 { 1029 _NOTE(ARGUNUSED(credp)) 1030 _NOTE(ARGUNUSED(rvalp)) 1031 1032 return (vd_process_ioctl(dev, cmd, (caddr_t)arg, mode)); 1033 } 1034 1035 static int 1036 vdc_print(dev_t dev, char *str) 1037 { 1038 cmn_err(CE_NOTE, "vdc%d: %s", VDCUNIT(dev), str); 1039 return (0); 1040 } 1041 1042 static int 1043 vdc_dump(dev_t dev, caddr_t addr, daddr_t blkno, int nblk) 1044 { 1045 int rv; 1046 size_t nbytes = nblk * DEV_BSIZE; 1047 int instance = VDCUNIT(dev); 1048 vdc_t *vdc = NULL; 1049 1050 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1051 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1052 return (ENXIO); 1053 } 1054 1055 DMSG(vdc, 2, "[%d] dump %ld bytes at block 0x%lx : addr=0x%p\n", 1056 instance, nbytes, blkno, (void *)addr); 1057 rv = vdc_send_request(vdc, VD_OP_BWRITE, addr, nbytes, 1058 VDCPART(dev), blkno, CB_STRATEGY, 0, VIO_write_dir); 1059 if (rv) { 1060 DMSG(vdc, 0, "Failed to do a disk dump (err=%d)\n", rv); 1061 return (rv); 1062 } 1063 1064 if (ddi_in_panic()) 1065 (void) vdc_drain_response(vdc); 1066 1067 DMSG(vdc, 0, "[%d] End\n", instance); 1068 1069 return (0); 1070 } 1071 1072 /* -------------------------------------------------------------------------- */ 1073 1074 /* 1075 * Disk access routines 1076 * 1077 */ 1078 1079 /* 1080 * vdc_strategy() 1081 * 1082 * Return Value: 1083 * 0: As per strategy(9E), the strategy() function must return 0 1084 * [ bioerror(9f) sets b_flags to the proper error code ] 1085 */ 1086 static int 1087 vdc_strategy(struct buf *buf) 1088 { 1089 int rv = -1; 1090 vdc_t *vdc = NULL; 1091 int instance = VDCUNIT(buf->b_edev); 1092 int op = (buf->b_flags & B_READ) ? VD_OP_BREAD : VD_OP_BWRITE; 1093 1094 if ((vdc = ddi_get_soft_state(vdc_state, instance)) == NULL) { 1095 cmn_err(CE_NOTE, "[%d] Couldn't get state structure", instance); 1096 bioerror(buf, ENXIO); 1097 biodone(buf); 1098 return (0); 1099 } 1100 1101 DMSG(vdc, 2, "[%d] %s %ld bytes at block %llx : b_addr=0x%p\n", 1102 instance, (buf->b_flags & B_READ) ? "Read" : "Write", 1103 buf->b_bcount, buf->b_lblkno, (void *)buf->b_un.b_addr); 1104 DTRACE_IO2(vstart, buf_t *, buf, vdc_t *, vdc); 1105 1106 bp_mapin(buf); 1107 1108 rv = vdc_send_request(vdc, op, (caddr_t)buf->b_un.b_addr, 1109 buf->b_bcount, VDCPART(buf->b_edev), buf->b_lblkno, 1110 CB_STRATEGY, buf, (op == VD_OP_BREAD) ? VIO_read_dir : 1111 VIO_write_dir); 1112 1113 ASSERT(rv == 0 || rv == EINVAL); 1114 1115 /* 1116 * If the request was successfully sent, the strategy call returns and 1117 * the ACK handler calls the bioxxx functions when the vDisk server is 1118 * done. 1119 */ 1120 if (rv) { 1121 DMSG(vdc, 0, "Failed to read/write (err=%d)\n", rv); 1122 bioerror(buf, rv); 1123 biodone(buf); 1124 } 1125 1126 return (0); 1127 } 1128 1129 /* 1130 * Function: 1131 * vdc_min 1132 * 1133 * Description: 1134 * Routine to limit the size of a data transfer. Used in 1135 * conjunction with physio(9F). 1136 * 1137 * Arguments: 1138 * bp - pointer to the indicated buf(9S) struct. 1139 * 1140 */ 1141 static void 1142 vdc_min(struct buf *bufp) 1143 { 1144 vdc_t *vdc = NULL; 1145 int instance = VDCUNIT(bufp->b_edev); 1146 1147 vdc = ddi_get_soft_state(vdc_state, instance); 1148 VERIFY(vdc != NULL); 1149 1150 if (bufp->b_bcount > (vdc->max_xfer_sz * vdc->block_size)) { 1151 bufp->b_bcount = vdc->max_xfer_sz * vdc->block_size; 1152 } 1153 } 1154 1155 static int 1156 vdc_read(dev_t dev, struct uio *uio, cred_t *cred) 1157 { 1158 _NOTE(ARGUNUSED(cred)) 1159 1160 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1161 return (physio(vdc_strategy, NULL, dev, B_READ, vdc_min, uio)); 1162 } 1163 1164 static int 1165 vdc_write(dev_t dev, struct uio *uio, cred_t *cred) 1166 { 1167 _NOTE(ARGUNUSED(cred)) 1168 1169 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1170 return (physio(vdc_strategy, NULL, dev, B_WRITE, vdc_min, uio)); 1171 } 1172 1173 static int 1174 vdc_aread(dev_t dev, struct aio_req *aio, cred_t *cred) 1175 { 1176 _NOTE(ARGUNUSED(cred)) 1177 1178 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1179 return (aphysio(vdc_strategy, anocancel, dev, B_READ, vdc_min, aio)); 1180 } 1181 1182 static int 1183 vdc_awrite(dev_t dev, struct aio_req *aio, cred_t *cred) 1184 { 1185 _NOTE(ARGUNUSED(cred)) 1186 1187 DMSGX(1, "[%d] Entered", VDCUNIT(dev)); 1188 return (aphysio(vdc_strategy, anocancel, dev, B_WRITE, vdc_min, aio)); 1189 } 1190 1191 1192 /* -------------------------------------------------------------------------- */ 1193 1194 /* 1195 * Handshake support 1196 */ 1197 1198 1199 /* 1200 * Function: 1201 * vdc_init_ver_negotiation() 1202 * 1203 * Description: 1204 * 1205 * Arguments: 1206 * vdc - soft state pointer for this instance of the device driver. 1207 * 1208 * Return Code: 1209 * 0 - Success 1210 */ 1211 static int 1212 vdc_init_ver_negotiation(vdc_t *vdc, vio_ver_t ver) 1213 { 1214 vio_ver_msg_t pkt; 1215 size_t msglen = sizeof (pkt); 1216 int status = -1; 1217 1218 ASSERT(vdc != NULL); 1219 ASSERT(mutex_owned(&vdc->lock)); 1220 1221 DMSG(vdc, 0, "[%d] Entered.\n", vdc->instance); 1222 1223 /* 1224 * set the Session ID to a unique value 1225 * (the lower 32 bits of the clock tick) 1226 */ 1227 vdc->session_id = ((uint32_t)gettick() & 0xffffffff); 1228 DMSG(vdc, 0, "[%d] Set SID to 0x%lx\n", vdc->instance, vdc->session_id); 1229 1230 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1231 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1232 pkt.tag.vio_subtype_env = VIO_VER_INFO; 1233 pkt.tag.vio_sid = vdc->session_id; 1234 pkt.dev_class = VDEV_DISK; 1235 pkt.ver_major = ver.major; 1236 pkt.ver_minor = ver.minor; 1237 1238 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1239 DMSG(vdc, 0, "[%d] Ver info sent (status = %d)\n", 1240 vdc->instance, status); 1241 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1242 DMSG(vdc, 0, "[%d] Failed to send Ver negotiation info: " 1243 "id(%lx) rv(%d) size(%ld)", 1244 vdc->instance, vdc->ldc_handle, 1245 status, msglen); 1246 if (msglen != sizeof (vio_ver_msg_t)) 1247 status = ENOMSG; 1248 } 1249 1250 return (status); 1251 } 1252 1253 /* 1254 * Function: 1255 * vdc_ver_negotiation() 1256 * 1257 * Description: 1258 * 1259 * Arguments: 1260 * vdcp - soft state pointer for this instance of the device driver. 1261 * 1262 * Return Code: 1263 * 0 - Success 1264 */ 1265 static int 1266 vdc_ver_negotiation(vdc_t *vdcp) 1267 { 1268 vio_msg_t vio_msg; 1269 int status; 1270 1271 if (status = vdc_init_ver_negotiation(vdcp, vdc_version[0])) 1272 return (status); 1273 1274 /* release lock and wait for response */ 1275 mutex_exit(&vdcp->lock); 1276 status = vdc_wait_for_response(vdcp, &vio_msg); 1277 mutex_enter(&vdcp->lock); 1278 if (status) { 1279 DMSG(vdcp, 0, 1280 "[%d] Failed waiting for Ver negotiation response, rv(%d)", 1281 vdcp->instance, status); 1282 return (status); 1283 } 1284 1285 /* check type and sub_type ... */ 1286 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1287 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1288 DMSG(vdcp, 0, "[%d] Invalid ver negotiation response\n", 1289 vdcp->instance); 1290 return (EPROTO); 1291 } 1292 1293 return (vdc_handle_ver_msg(vdcp, (vio_ver_msg_t *)&vio_msg)); 1294 } 1295 1296 /* 1297 * Function: 1298 * vdc_init_attr_negotiation() 1299 * 1300 * Description: 1301 * 1302 * Arguments: 1303 * vdc - soft state pointer for this instance of the device driver. 1304 * 1305 * Return Code: 1306 * 0 - Success 1307 */ 1308 static int 1309 vdc_init_attr_negotiation(vdc_t *vdc) 1310 { 1311 vd_attr_msg_t pkt; 1312 size_t msglen = sizeof (pkt); 1313 int status; 1314 1315 ASSERT(vdc != NULL); 1316 ASSERT(mutex_owned(&vdc->lock)); 1317 1318 DMSG(vdc, 0, "[%d] entered\n", vdc->instance); 1319 1320 /* fill in tag */ 1321 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1322 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1323 pkt.tag.vio_subtype_env = VIO_ATTR_INFO; 1324 pkt.tag.vio_sid = vdc->session_id; 1325 /* fill in payload */ 1326 pkt.max_xfer_sz = vdc->max_xfer_sz; 1327 pkt.vdisk_block_size = vdc->block_size; 1328 pkt.xfer_mode = VIO_DRING_MODE; 1329 pkt.operations = 0; /* server will set bits of valid operations */ 1330 pkt.vdisk_type = 0; /* server will set to valid device type */ 1331 pkt.vdisk_size = 0; /* server will set to valid size */ 1332 1333 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1334 DMSG(vdc, 0, "Attr info sent (status = %d)\n", status); 1335 1336 if ((status != 0) || (msglen != sizeof (vio_ver_msg_t))) { 1337 DMSG(vdc, 0, "[%d] Failed to send Attr negotiation info: " 1338 "id(%lx) rv(%d) size(%ld)", 1339 vdc->instance, vdc->ldc_handle, 1340 status, msglen); 1341 if (msglen != sizeof (vio_ver_msg_t)) 1342 status = ENOMSG; 1343 } 1344 1345 return (status); 1346 } 1347 1348 /* 1349 * Function: 1350 * vdc_attr_negotiation() 1351 * 1352 * Description: 1353 * 1354 * Arguments: 1355 * vdc - soft state pointer for this instance of the device driver. 1356 * 1357 * Return Code: 1358 * 0 - Success 1359 */ 1360 static int 1361 vdc_attr_negotiation(vdc_t *vdcp) 1362 { 1363 int status; 1364 vio_msg_t vio_msg; 1365 1366 if (status = vdc_init_attr_negotiation(vdcp)) 1367 return (status); 1368 1369 /* release lock and wait for response */ 1370 mutex_exit(&vdcp->lock); 1371 status = vdc_wait_for_response(vdcp, &vio_msg); 1372 mutex_enter(&vdcp->lock); 1373 if (status) { 1374 DMSG(vdcp, 0, 1375 "[%d] Failed waiting for Attr negotiation response, rv(%d)", 1376 vdcp->instance, status); 1377 return (status); 1378 } 1379 1380 /* check type and sub_type ... */ 1381 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1382 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1383 DMSG(vdcp, 0, "[%d] Invalid attr negotiation response\n", 1384 vdcp->instance); 1385 return (EPROTO); 1386 } 1387 1388 return (vdc_handle_attr_msg(vdcp, (vd_attr_msg_t *)&vio_msg)); 1389 } 1390 1391 1392 /* 1393 * Function: 1394 * vdc_init_dring_negotiate() 1395 * 1396 * Description: 1397 * 1398 * Arguments: 1399 * vdc - soft state pointer for this instance of the device driver. 1400 * 1401 * Return Code: 1402 * 0 - Success 1403 */ 1404 static int 1405 vdc_init_dring_negotiate(vdc_t *vdc) 1406 { 1407 vio_dring_reg_msg_t pkt; 1408 size_t msglen = sizeof (pkt); 1409 int status = -1; 1410 int retry; 1411 int nretries = 10; 1412 1413 ASSERT(vdc != NULL); 1414 ASSERT(mutex_owned(&vdc->lock)); 1415 1416 for (retry = 0; retry < nretries; retry++) { 1417 status = vdc_init_descriptor_ring(vdc); 1418 if (status != EAGAIN) 1419 break; 1420 drv_usecwait(vdc_min_timeout_ldc); 1421 } 1422 1423 if (status != 0) { 1424 DMSG(vdc, 0, "[%d] Failed to init DRing (status = %d)\n", 1425 vdc->instance, status); 1426 return (status); 1427 } 1428 1429 DMSG(vdc, 0, "[%d] Init of descriptor ring completed (status = %d)\n", 1430 vdc->instance, status); 1431 1432 /* fill in tag */ 1433 pkt.tag.vio_msgtype = VIO_TYPE_CTRL; 1434 pkt.tag.vio_subtype = VIO_SUBTYPE_INFO; 1435 pkt.tag.vio_subtype_env = VIO_DRING_REG; 1436 pkt.tag.vio_sid = vdc->session_id; 1437 /* fill in payload */ 1438 pkt.dring_ident = 0; 1439 pkt.num_descriptors = vdc->dring_len; 1440 pkt.descriptor_size = vdc->dring_entry_size; 1441 pkt.options = (VIO_TX_DRING | VIO_RX_DRING); 1442 pkt.ncookies = vdc->dring_cookie_count; 1443 pkt.cookie[0] = vdc->dring_cookie[0]; /* for now just one cookie */ 1444 1445 status = vdc_send(vdc, (caddr_t)&pkt, &msglen); 1446 if (status != 0) { 1447 DMSG(vdc, 0, "[%d] Failed to register DRing (err = %d)", 1448 vdc->instance, status); 1449 } 1450 1451 return (status); 1452 } 1453 1454 1455 /* 1456 * Function: 1457 * vdc_dring_negotiation() 1458 * 1459 * Description: 1460 * 1461 * Arguments: 1462 * vdc - soft state pointer for this instance of the device driver. 1463 * 1464 * Return Code: 1465 * 0 - Success 1466 */ 1467 static int 1468 vdc_dring_negotiation(vdc_t *vdcp) 1469 { 1470 int status; 1471 vio_msg_t vio_msg; 1472 1473 if (status = vdc_init_dring_negotiate(vdcp)) 1474 return (status); 1475 1476 /* release lock and wait for response */ 1477 mutex_exit(&vdcp->lock); 1478 status = vdc_wait_for_response(vdcp, &vio_msg); 1479 mutex_enter(&vdcp->lock); 1480 if (status) { 1481 DMSG(vdcp, 0, 1482 "[%d] Failed waiting for Dring negotiation response," 1483 " rv(%d)", vdcp->instance, status); 1484 return (status); 1485 } 1486 1487 /* check type and sub_type ... */ 1488 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1489 vio_msg.tag.vio_subtype == VIO_SUBTYPE_INFO) { 1490 DMSG(vdcp, 0, "[%d] Invalid Dring negotiation response\n", 1491 vdcp->instance); 1492 return (EPROTO); 1493 } 1494 1495 return (vdc_handle_dring_reg_msg(vdcp, 1496 (vio_dring_reg_msg_t *)&vio_msg)); 1497 } 1498 1499 1500 /* 1501 * Function: 1502 * vdc_send_rdx() 1503 * 1504 * Description: 1505 * 1506 * Arguments: 1507 * vdc - soft state pointer for this instance of the device driver. 1508 * 1509 * Return Code: 1510 * 0 - Success 1511 */ 1512 static int 1513 vdc_send_rdx(vdc_t *vdcp) 1514 { 1515 vio_msg_t msg; 1516 size_t msglen = sizeof (vio_msg_t); 1517 int status; 1518 1519 /* 1520 * Send an RDX message to vds to indicate we are ready 1521 * to send data 1522 */ 1523 msg.tag.vio_msgtype = VIO_TYPE_CTRL; 1524 msg.tag.vio_subtype = VIO_SUBTYPE_INFO; 1525 msg.tag.vio_subtype_env = VIO_RDX; 1526 msg.tag.vio_sid = vdcp->session_id; 1527 status = vdc_send(vdcp, (caddr_t)&msg, &msglen); 1528 if (status != 0) { 1529 DMSG(vdcp, 0, "[%d] Failed to send RDX message (%d)", 1530 vdcp->instance, status); 1531 } 1532 1533 return (status); 1534 } 1535 1536 /* 1537 * Function: 1538 * vdc_handle_rdx() 1539 * 1540 * Description: 1541 * 1542 * Arguments: 1543 * vdc - soft state pointer for this instance of the device driver. 1544 * msgp - received msg 1545 * 1546 * Return Code: 1547 * 0 - Success 1548 */ 1549 static int 1550 vdc_handle_rdx(vdc_t *vdcp, vio_rdx_msg_t *msgp) 1551 { 1552 _NOTE(ARGUNUSED(vdcp)) 1553 _NOTE(ARGUNUSED(msgp)) 1554 1555 ASSERT(msgp->tag.vio_msgtype == VIO_TYPE_CTRL); 1556 ASSERT(msgp->tag.vio_subtype == VIO_SUBTYPE_ACK); 1557 ASSERT(msgp->tag.vio_subtype_env == VIO_RDX); 1558 1559 DMSG(vdcp, 1, "[%d] Got an RDX msg", vdcp->instance); 1560 1561 return (0); 1562 } 1563 1564 /* 1565 * Function: 1566 * vdc_rdx_exchange() 1567 * 1568 * Description: 1569 * 1570 * Arguments: 1571 * vdc - soft state pointer for this instance of the device driver. 1572 * 1573 * Return Code: 1574 * 0 - Success 1575 */ 1576 static int 1577 vdc_rdx_exchange(vdc_t *vdcp) 1578 { 1579 int status; 1580 vio_msg_t vio_msg; 1581 1582 if (status = vdc_send_rdx(vdcp)) 1583 return (status); 1584 1585 /* release lock and wait for response */ 1586 mutex_exit(&vdcp->lock); 1587 status = vdc_wait_for_response(vdcp, &vio_msg); 1588 mutex_enter(&vdcp->lock); 1589 if (status) { 1590 DMSG(vdcp, 0, 1591 "[%d] Failed waiting for RDX response," 1592 " rv(%d)", vdcp->instance, status); 1593 return (status); 1594 } 1595 1596 /* check type and sub_type ... */ 1597 if (vio_msg.tag.vio_msgtype != VIO_TYPE_CTRL || 1598 vio_msg.tag.vio_subtype != VIO_SUBTYPE_ACK) { 1599 DMSG(vdcp, 0, "[%d] Invalid RDX response\n", 1600 vdcp->instance); 1601 return (EPROTO); 1602 } 1603 1604 return (vdc_handle_rdx(vdcp, (vio_rdx_msg_t *)&vio_msg)); 1605 } 1606 1607 1608 /* -------------------------------------------------------------------------- */ 1609 1610 /* 1611 * LDC helper routines 1612 */ 1613 1614 static int 1615 vdc_recv(vdc_t *vdc, vio_msg_t *msgp, size_t *nbytesp) 1616 { 1617 int status; 1618 boolean_t q_has_pkts = B_FALSE; 1619 int delay_time; 1620 size_t len; 1621 1622 mutex_enter(&vdc->read_lock); 1623 1624 if (vdc->read_state == VDC_READ_IDLE) 1625 vdc->read_state = VDC_READ_WAITING; 1626 1627 while (vdc->read_state != VDC_READ_PENDING) { 1628 1629 /* detect if the connection has been reset */ 1630 if (vdc->read_state == VDC_READ_RESET) { 1631 status = ECONNRESET; 1632 goto done; 1633 } 1634 1635 cv_wait(&vdc->read_cv, &vdc->read_lock); 1636 } 1637 1638 /* 1639 * Until we get a blocking ldc read we have to retry 1640 * until the entire LDC message has arrived before 1641 * ldc_read() will succeed. Note we also bail out if 1642 * the chanel is reset or goes away. 1643 */ 1644 delay_time = vdc_ldc_read_init_delay; 1645 loop: 1646 len = *nbytesp; 1647 status = ldc_read(vdc->ldc_handle, (caddr_t)msgp, &len); 1648 switch (status) { 1649 case EAGAIN: 1650 delay_time *= 2; 1651 if (delay_time >= vdc_ldc_read_max_delay) 1652 delay_time = vdc_ldc_read_max_delay; 1653 delay(delay_time); 1654 goto loop; 1655 1656 case 0: 1657 if (len == 0) { 1658 DMSG(vdc, 0, "[%d] ldc_read returned 0 bytes with " 1659 "no error!\n", vdc->instance); 1660 goto loop; 1661 } 1662 1663 *nbytesp = len; 1664 1665 /* 1666 * If there are pending messages, leave the 1667 * read state as pending. Otherwise, set the state 1668 * back to idle. 1669 */ 1670 status = ldc_chkq(vdc->ldc_handle, &q_has_pkts); 1671 if (status == 0 && !q_has_pkts) 1672 vdc->read_state = VDC_READ_IDLE; 1673 1674 break; 1675 default: 1676 DMSG(vdc, 0, "ldc_read returned %d\n", status); 1677 break; 1678 } 1679 1680 done: 1681 mutex_exit(&vdc->read_lock); 1682 1683 return (status); 1684 } 1685 1686 1687 1688 #ifdef DEBUG 1689 void 1690 vdc_decode_tag(vdc_t *vdcp, vio_msg_t *msg) 1691 { 1692 char *ms, *ss, *ses; 1693 switch (msg->tag.vio_msgtype) { 1694 #define Q(_s) case _s : ms = #_s; break; 1695 Q(VIO_TYPE_CTRL) 1696 Q(VIO_TYPE_DATA) 1697 Q(VIO_TYPE_ERR) 1698 #undef Q 1699 default: ms = "unknown"; break; 1700 } 1701 1702 switch (msg->tag.vio_subtype) { 1703 #define Q(_s) case _s : ss = #_s; break; 1704 Q(VIO_SUBTYPE_INFO) 1705 Q(VIO_SUBTYPE_ACK) 1706 Q(VIO_SUBTYPE_NACK) 1707 #undef Q 1708 default: ss = "unknown"; break; 1709 } 1710 1711 switch (msg->tag.vio_subtype_env) { 1712 #define Q(_s) case _s : ses = #_s; break; 1713 Q(VIO_VER_INFO) 1714 Q(VIO_ATTR_INFO) 1715 Q(VIO_DRING_REG) 1716 Q(VIO_DRING_UNREG) 1717 Q(VIO_RDX) 1718 Q(VIO_PKT_DATA) 1719 Q(VIO_DESC_DATA) 1720 Q(VIO_DRING_DATA) 1721 #undef Q 1722 default: ses = "unknown"; break; 1723 } 1724 1725 DMSG(vdcp, 3, "(%x/%x/%x) message : (%s/%s/%s)\n", 1726 msg->tag.vio_msgtype, msg->tag.vio_subtype, 1727 msg->tag.vio_subtype_env, ms, ss, ses); 1728 } 1729 #endif 1730 1731 /* 1732 * Function: 1733 * vdc_send() 1734 * 1735 * Description: 1736 * The function encapsulates the call to write a message using LDC. 1737 * If LDC indicates that the call failed due to the queue being full, 1738 * we retry the ldc_write() [ up to 'vdc_retries' time ], otherwise 1739 * we return the error returned by LDC. 1740 * 1741 * Arguments: 1742 * ldc_handle - LDC handle for the channel this instance of vdc uses 1743 * pkt - address of LDC message to be sent 1744 * msglen - the size of the message being sent. When the function 1745 * returns, this contains the number of bytes written. 1746 * 1747 * Return Code: 1748 * 0 - Success. 1749 * EINVAL - pkt or msglen were NULL 1750 * ECONNRESET - The connection was not up. 1751 * EWOULDBLOCK - LDC queue is full 1752 * xxx - other error codes returned by ldc_write 1753 */ 1754 static int 1755 vdc_send(vdc_t *vdc, caddr_t pkt, size_t *msglen) 1756 { 1757 size_t size = 0; 1758 int status = 0; 1759 clock_t delay_ticks; 1760 1761 ASSERT(vdc != NULL); 1762 ASSERT(mutex_owned(&vdc->lock)); 1763 ASSERT(msglen != NULL); 1764 ASSERT(*msglen != 0); 1765 1766 #ifdef DEBUG 1767 vdc_decode_tag(vdc, (vio_msg_t *)pkt); 1768 #endif 1769 /* 1770 * Wait indefinitely to send if channel 1771 * is busy, but bail out if we succeed or 1772 * if the channel closes or is reset. 1773 */ 1774 delay_ticks = vdc_hz_min_ldc_delay; 1775 do { 1776 size = *msglen; 1777 status = ldc_write(vdc->ldc_handle, pkt, &size); 1778 if (status == EWOULDBLOCK) { 1779 delay(delay_ticks); 1780 /* geometric backoff */ 1781 delay_ticks *= 2; 1782 if (delay_ticks > vdc_hz_max_ldc_delay) 1783 delay_ticks = vdc_hz_max_ldc_delay; 1784 } 1785 } while (status == EWOULDBLOCK); 1786 1787 /* if LDC had serious issues --- reset vdc state */ 1788 if (status == EIO || status == ECONNRESET) { 1789 /* LDC had serious issues --- reset vdc state */ 1790 mutex_enter(&vdc->read_lock); 1791 if ((vdc->read_state == VDC_READ_WAITING) || 1792 (vdc->read_state == VDC_READ_RESET)) 1793 cv_signal(&vdc->read_cv); 1794 vdc->read_state = VDC_READ_RESET; 1795 mutex_exit(&vdc->read_lock); 1796 1797 /* wake up any waiters in the reset thread */ 1798 if (vdc->state == VDC_STATE_INIT_WAITING) { 1799 DMSG(vdc, 0, "[%d] write reset - " 1800 "vdc is resetting ..\n", vdc->instance); 1801 vdc->state = VDC_STATE_RESETTING; 1802 cv_signal(&vdc->initwait_cv); 1803 } 1804 1805 return (ECONNRESET); 1806 } 1807 1808 /* return the last size written */ 1809 *msglen = size; 1810 1811 return (status); 1812 } 1813 1814 /* 1815 * Function: 1816 * vdc_get_ldc_id() 1817 * 1818 * Description: 1819 * This function gets the 'ldc-id' for this particular instance of vdc. 1820 * The id returned is the guest domain channel endpoint LDC uses for 1821 * communication with vds. 1822 * 1823 * Arguments: 1824 * dip - dev info pointer for this instance of the device driver. 1825 * ldc_id - pointer to variable used to return the 'ldc-id' found. 1826 * 1827 * Return Code: 1828 * 0 - Success. 1829 * ENOENT - Expected node or property did not exist. 1830 * ENXIO - Unexpected error communicating with MD framework 1831 */ 1832 static int 1833 vdc_get_ldc_id(dev_info_t *dip, uint64_t *ldc_id) 1834 { 1835 int status = ENOENT; 1836 char *node_name = NULL; 1837 md_t *mdp = NULL; 1838 int num_nodes; 1839 int num_vdevs; 1840 int num_chans; 1841 mde_cookie_t rootnode; 1842 mde_cookie_t *listp = NULL; 1843 mde_cookie_t *chanp = NULL; 1844 boolean_t found_inst = B_FALSE; 1845 int listsz; 1846 int idx; 1847 uint64_t md_inst; 1848 int obp_inst; 1849 int instance = ddi_get_instance(dip); 1850 1851 ASSERT(ldc_id != NULL); 1852 *ldc_id = 0; 1853 1854 /* 1855 * Get the OBP instance number for comparison with the MD instance 1856 * 1857 * The "cfg-handle" property of a vdc node in an MD contains the MD's 1858 * notion of "instance", or unique identifier, for that node; OBP 1859 * stores the value of the "cfg-handle" MD property as the value of 1860 * the "reg" property on the node in the device tree it builds from 1861 * the MD and passes to Solaris. Thus, we look up the devinfo node's 1862 * "reg" property value to uniquely identify this device instance. 1863 * If the "reg" property cannot be found, the device tree state is 1864 * presumably so broken that there is no point in continuing. 1865 */ 1866 if (!ddi_prop_exists(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, OBP_REG)) { 1867 cmn_err(CE_WARN, "'%s' property does not exist", OBP_REG); 1868 return (ENOENT); 1869 } 1870 obp_inst = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS, 1871 OBP_REG, -1); 1872 DMSGX(1, "[%d] OBP inst=%d\n", instance, obp_inst); 1873 1874 /* 1875 * We now walk the MD nodes and if an instance of a vdc node matches 1876 * the instance got from OBP we get the ldc-id property. 1877 */ 1878 if ((mdp = md_get_handle()) == NULL) { 1879 cmn_err(CE_WARN, "unable to init machine description"); 1880 return (ENXIO); 1881 } 1882 1883 num_nodes = md_node_count(mdp); 1884 ASSERT(num_nodes > 0); 1885 1886 listsz = num_nodes * sizeof (mde_cookie_t); 1887 1888 /* allocate memory for nodes */ 1889 listp = kmem_zalloc(listsz, KM_SLEEP); 1890 chanp = kmem_zalloc(listsz, KM_SLEEP); 1891 1892 rootnode = md_root_node(mdp); 1893 ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE); 1894 1895 /* 1896 * Search for all the virtual devices, we will then check to see which 1897 * ones are disk nodes. 1898 */ 1899 num_vdevs = md_scan_dag(mdp, rootnode, 1900 md_find_name(mdp, VDC_MD_VDEV_NAME), 1901 md_find_name(mdp, "fwd"), listp); 1902 1903 if (num_vdevs <= 0) { 1904 cmn_err(CE_NOTE, "No '%s' node found", VDC_MD_VDEV_NAME); 1905 status = ENOENT; 1906 goto done; 1907 } 1908 1909 DMSGX(1, "[%d] num_vdevs=%d\n", instance, num_vdevs); 1910 for (idx = 0; idx < num_vdevs; idx++) { 1911 status = md_get_prop_str(mdp, listp[idx], "name", &node_name); 1912 if ((status != 0) || (node_name == NULL)) { 1913 cmn_err(CE_NOTE, "Unable to get name of node type '%s'" 1914 ": err %d", VDC_MD_VDEV_NAME, status); 1915 continue; 1916 } 1917 1918 DMSGX(1, "[%d] Found node '%s'\n", instance, node_name); 1919 if (strcmp(VDC_MD_DISK_NAME, node_name) == 0) { 1920 status = md_get_prop_val(mdp, listp[idx], 1921 VDC_MD_CFG_HDL, &md_inst); 1922 DMSGX(1, "[%d] vdc inst in MD=%lx\n", 1923 instance, md_inst); 1924 if ((status == 0) && (md_inst == obp_inst)) { 1925 found_inst = B_TRUE; 1926 break; 1927 } 1928 } 1929 } 1930 1931 if (!found_inst) { 1932 DMSGX(0, "Unable to find correct '%s' node", VDC_MD_DISK_NAME); 1933 status = ENOENT; 1934 goto done; 1935 } 1936 DMSGX(0, "[%d] MD inst=%lx\n", instance, md_inst); 1937 1938 /* get the channels for this node */ 1939 num_chans = md_scan_dag(mdp, listp[idx], 1940 md_find_name(mdp, VDC_MD_CHAN_NAME), 1941 md_find_name(mdp, "fwd"), chanp); 1942 1943 /* expecting at least one channel */ 1944 if (num_chans <= 0) { 1945 cmn_err(CE_NOTE, "No '%s' node for '%s' port", 1946 VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME); 1947 status = ENOENT; 1948 goto done; 1949 1950 } else if (num_chans != 1) { 1951 DMSGX(0, "[%d] Expected 1 '%s' node for '%s' port, found %d\n", 1952 instance, VDC_MD_CHAN_NAME, VDC_MD_VDEV_NAME, 1953 num_chans); 1954 } 1955 1956 /* 1957 * We use the first channel found (index 0), irrespective of how 1958 * many are there in total. 1959 */ 1960 if (md_get_prop_val(mdp, chanp[0], VDC_ID_PROP, ldc_id) != 0) { 1961 cmn_err(CE_NOTE, "Channel '%s' property not found", 1962 VDC_ID_PROP); 1963 status = ENOENT; 1964 } 1965 1966 DMSGX(0, "[%d] LDC id is 0x%lx\n", instance, *ldc_id); 1967 1968 done: 1969 if (chanp) 1970 kmem_free(chanp, listsz); 1971 if (listp) 1972 kmem_free(listp, listsz); 1973 1974 (void) md_fini_handle(mdp); 1975 1976 return (status); 1977 } 1978 1979 static int 1980 vdc_do_ldc_up(vdc_t *vdc) 1981 { 1982 int status; 1983 ldc_status_t ldc_state; 1984 1985 DMSG(vdc, 0, "[%d] Bringing up channel %lx\n", 1986 vdc->instance, vdc->ldc_id); 1987 1988 if (vdc->lifecycle == VDC_LC_DETACHING) 1989 return (EINVAL); 1990 1991 if ((status = ldc_up(vdc->ldc_handle)) != 0) { 1992 switch (status) { 1993 case ECONNREFUSED: /* listener not ready at other end */ 1994 DMSG(vdc, 0, "[%d] ldc_up(%lx,...) return %d\n", 1995 vdc->instance, vdc->ldc_id, status); 1996 status = 0; 1997 break; 1998 default: 1999 DMSG(vdc, 0, "[%d] Failed to bring up LDC: " 2000 "channel=%ld, err=%d", vdc->instance, vdc->ldc_id, 2001 status); 2002 break; 2003 } 2004 } 2005 2006 if (ldc_status(vdc->ldc_handle, &ldc_state) == 0) { 2007 vdc->ldc_state = ldc_state; 2008 if (ldc_state == LDC_UP) { 2009 DMSG(vdc, 0, "[%d] LDC channel already up\n", 2010 vdc->instance); 2011 vdc->seq_num = 1; 2012 vdc->seq_num_reply = 0; 2013 } 2014 } 2015 2016 return (status); 2017 } 2018 2019 /* 2020 * Function: 2021 * vdc_terminate_ldc() 2022 * 2023 * Description: 2024 * 2025 * Arguments: 2026 * vdc - soft state pointer for this instance of the device driver. 2027 * 2028 * Return Code: 2029 * None 2030 */ 2031 static void 2032 vdc_terminate_ldc(vdc_t *vdc) 2033 { 2034 int instance = ddi_get_instance(vdc->dip); 2035 2036 ASSERT(vdc != NULL); 2037 ASSERT(mutex_owned(&vdc->lock)); 2038 2039 DMSG(vdc, 0, "[%d] initialized=%x\n", instance, vdc->initialized); 2040 2041 if (vdc->initialized & VDC_LDC_OPEN) { 2042 DMSG(vdc, 0, "[%d] ldc_close()\n", instance); 2043 (void) ldc_close(vdc->ldc_handle); 2044 } 2045 if (vdc->initialized & VDC_LDC_CB) { 2046 DMSG(vdc, 0, "[%d] ldc_unreg_callback()\n", instance); 2047 (void) ldc_unreg_callback(vdc->ldc_handle); 2048 } 2049 if (vdc->initialized & VDC_LDC) { 2050 DMSG(vdc, 0, "[%d] ldc_fini()\n", instance); 2051 (void) ldc_fini(vdc->ldc_handle); 2052 vdc->ldc_handle = NULL; 2053 } 2054 2055 vdc->initialized &= ~(VDC_LDC | VDC_LDC_CB | VDC_LDC_OPEN); 2056 } 2057 2058 /* -------------------------------------------------------------------------- */ 2059 2060 /* 2061 * Descriptor Ring helper routines 2062 */ 2063 2064 /* 2065 * Function: 2066 * vdc_init_descriptor_ring() 2067 * 2068 * Description: 2069 * 2070 * Arguments: 2071 * vdc - soft state pointer for this instance of the device driver. 2072 * 2073 * Return Code: 2074 * 0 - Success 2075 */ 2076 static int 2077 vdc_init_descriptor_ring(vdc_t *vdc) 2078 { 2079 vd_dring_entry_t *dep = NULL; /* DRing Entry pointer */ 2080 int status = 0; 2081 int i; 2082 2083 DMSG(vdc, 0, "[%d] initialized=%x\n", vdc->instance, vdc->initialized); 2084 2085 ASSERT(vdc != NULL); 2086 ASSERT(mutex_owned(&vdc->lock)); 2087 ASSERT(vdc->ldc_handle != NULL); 2088 2089 /* ensure we have enough room to store max sized block */ 2090 ASSERT(maxphys <= VD_MAX_BLOCK_SIZE); 2091 2092 if ((vdc->initialized & VDC_DRING_INIT) == 0) { 2093 DMSG(vdc, 0, "[%d] ldc_mem_dring_create\n", vdc->instance); 2094 /* 2095 * Calculate the maximum block size we can transmit using one 2096 * Descriptor Ring entry from the attributes returned by the 2097 * vDisk server. This is subject to a minimum of 'maxphys' 2098 * as we do not have the capability to split requests over 2099 * multiple DRing entries. 2100 */ 2101 if ((vdc->max_xfer_sz * vdc->block_size) < maxphys) { 2102 DMSG(vdc, 0, "[%d] using minimum DRing size\n", 2103 vdc->instance); 2104 vdc->dring_max_cookies = maxphys / PAGESIZE; 2105 } else { 2106 vdc->dring_max_cookies = 2107 (vdc->max_xfer_sz * vdc->block_size) / PAGESIZE; 2108 } 2109 vdc->dring_entry_size = (sizeof (vd_dring_entry_t) + 2110 (sizeof (ldc_mem_cookie_t) * 2111 (vdc->dring_max_cookies - 1))); 2112 vdc->dring_len = VD_DRING_LEN; 2113 2114 status = ldc_mem_dring_create(vdc->dring_len, 2115 vdc->dring_entry_size, &vdc->ldc_dring_hdl); 2116 if ((vdc->ldc_dring_hdl == NULL) || (status != 0)) { 2117 DMSG(vdc, 0, "[%d] Descriptor ring creation failed", 2118 vdc->instance); 2119 return (status); 2120 } 2121 vdc->initialized |= VDC_DRING_INIT; 2122 } 2123 2124 if ((vdc->initialized & VDC_DRING_BOUND) == 0) { 2125 DMSG(vdc, 0, "[%d] ldc_mem_dring_bind\n", vdc->instance); 2126 vdc->dring_cookie = 2127 kmem_zalloc(sizeof (ldc_mem_cookie_t), KM_SLEEP); 2128 2129 status = ldc_mem_dring_bind(vdc->ldc_handle, vdc->ldc_dring_hdl, 2130 LDC_SHADOW_MAP|LDC_DIRECT_MAP, LDC_MEM_RW, 2131 &vdc->dring_cookie[0], 2132 &vdc->dring_cookie_count); 2133 if (status != 0) { 2134 DMSG(vdc, 0, "[%d] Failed to bind descriptor ring " 2135 "(%lx) to channel (%lx) status=%d\n", 2136 vdc->instance, vdc->ldc_dring_hdl, 2137 vdc->ldc_handle, status); 2138 return (status); 2139 } 2140 ASSERT(vdc->dring_cookie_count == 1); 2141 vdc->initialized |= VDC_DRING_BOUND; 2142 } 2143 2144 status = ldc_mem_dring_info(vdc->ldc_dring_hdl, &vdc->dring_mem_info); 2145 if (status != 0) { 2146 DMSG(vdc, 0, 2147 "[%d] Failed to get info for descriptor ring (%lx)\n", 2148 vdc->instance, vdc->ldc_dring_hdl); 2149 return (status); 2150 } 2151 2152 if ((vdc->initialized & VDC_DRING_LOCAL) == 0) { 2153 DMSG(vdc, 0, "[%d] local dring\n", vdc->instance); 2154 2155 /* Allocate the local copy of this dring */ 2156 vdc->local_dring = 2157 kmem_zalloc(vdc->dring_len * sizeof (vdc_local_desc_t), 2158 KM_SLEEP); 2159 vdc->initialized |= VDC_DRING_LOCAL; 2160 } 2161 2162 /* 2163 * Mark all DRing entries as free and initialize the private 2164 * descriptor's memory handles. If any entry is initialized, 2165 * we need to free it later so we set the bit in 'initialized' 2166 * at the start. 2167 */ 2168 vdc->initialized |= VDC_DRING_ENTRY; 2169 for (i = 0; i < vdc->dring_len; i++) { 2170 dep = VDC_GET_DRING_ENTRY_PTR(vdc, i); 2171 dep->hdr.dstate = VIO_DESC_FREE; 2172 2173 status = ldc_mem_alloc_handle(vdc->ldc_handle, 2174 &vdc->local_dring[i].desc_mhdl); 2175 if (status != 0) { 2176 DMSG(vdc, 0, "![%d] Failed to alloc mem handle for" 2177 " descriptor %d", vdc->instance, i); 2178 return (status); 2179 } 2180 vdc->local_dring[i].is_free = B_TRUE; 2181 vdc->local_dring[i].dep = dep; 2182 } 2183 2184 /* Initialize the starting index */ 2185 vdc->dring_curr_idx = 0; 2186 2187 return (status); 2188 } 2189 2190 /* 2191 * Function: 2192 * vdc_destroy_descriptor_ring() 2193 * 2194 * Description: 2195 * 2196 * Arguments: 2197 * vdc - soft state pointer for this instance of the device driver. 2198 * 2199 * Return Code: 2200 * None 2201 */ 2202 static void 2203 vdc_destroy_descriptor_ring(vdc_t *vdc) 2204 { 2205 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2206 ldc_mem_handle_t mhdl = NULL; 2207 ldc_mem_info_t minfo; 2208 int status = -1; 2209 int i; /* loop */ 2210 2211 ASSERT(vdc != NULL); 2212 ASSERT(mutex_owned(&vdc->lock)); 2213 2214 DMSG(vdc, 0, "[%d] Entered\n", vdc->instance); 2215 2216 if (vdc->initialized & VDC_DRING_ENTRY) { 2217 DMSG(vdc, 0, 2218 "[%d] Removing Local DRing entries\n", vdc->instance); 2219 for (i = 0; i < vdc->dring_len; i++) { 2220 ldep = &vdc->local_dring[i]; 2221 mhdl = ldep->desc_mhdl; 2222 2223 if (mhdl == NULL) 2224 continue; 2225 2226 if ((status = ldc_mem_info(mhdl, &minfo)) != 0) { 2227 DMSG(vdc, 0, 2228 "ldc_mem_info returned an error: %d\n", 2229 status); 2230 2231 /* 2232 * This must mean that the mem handle 2233 * is not valid. Clear it out so that 2234 * no one tries to use it. 2235 */ 2236 ldep->desc_mhdl = NULL; 2237 continue; 2238 } 2239 2240 if (minfo.status == LDC_BOUND) { 2241 (void) ldc_mem_unbind_handle(mhdl); 2242 } 2243 2244 (void) ldc_mem_free_handle(mhdl); 2245 2246 ldep->desc_mhdl = NULL; 2247 } 2248 vdc->initialized &= ~VDC_DRING_ENTRY; 2249 } 2250 2251 if (vdc->initialized & VDC_DRING_LOCAL) { 2252 DMSG(vdc, 0, "[%d] Freeing Local DRing\n", vdc->instance); 2253 kmem_free(vdc->local_dring, 2254 vdc->dring_len * sizeof (vdc_local_desc_t)); 2255 vdc->initialized &= ~VDC_DRING_LOCAL; 2256 } 2257 2258 if (vdc->initialized & VDC_DRING_BOUND) { 2259 DMSG(vdc, 0, "[%d] Unbinding DRing\n", vdc->instance); 2260 status = ldc_mem_dring_unbind(vdc->ldc_dring_hdl); 2261 if (status == 0) { 2262 vdc->initialized &= ~VDC_DRING_BOUND; 2263 } else { 2264 DMSG(vdc, 0, "[%d] Error %d unbinding DRing %lx", 2265 vdc->instance, status, vdc->ldc_dring_hdl); 2266 } 2267 kmem_free(vdc->dring_cookie, sizeof (ldc_mem_cookie_t)); 2268 } 2269 2270 if (vdc->initialized & VDC_DRING_INIT) { 2271 DMSG(vdc, 0, "[%d] Destroying DRing\n", vdc->instance); 2272 status = ldc_mem_dring_destroy(vdc->ldc_dring_hdl); 2273 if (status == 0) { 2274 vdc->ldc_dring_hdl = NULL; 2275 bzero(&vdc->dring_mem_info, sizeof (ldc_mem_info_t)); 2276 vdc->initialized &= ~VDC_DRING_INIT; 2277 } else { 2278 DMSG(vdc, 0, "[%d] Error %d destroying DRing (%lx)", 2279 vdc->instance, status, vdc->ldc_dring_hdl); 2280 } 2281 } 2282 } 2283 2284 /* 2285 * Function: 2286 * vdc_map_to_shared_ring() 2287 * 2288 * Description: 2289 * Copy contents of the local descriptor to the shared 2290 * memory descriptor. 2291 * 2292 * Arguments: 2293 * vdcp - soft state pointer for this instance of the device driver. 2294 * idx - descriptor ring index 2295 * 2296 * Return Code: 2297 * None 2298 */ 2299 static int 2300 vdc_map_to_shared_dring(vdc_t *vdcp, int idx) 2301 { 2302 vdc_local_desc_t *ldep; 2303 vd_dring_entry_t *dep; 2304 int rv; 2305 2306 ldep = &(vdcp->local_dring[idx]); 2307 2308 /* for now leave in the old pop_mem_hdl stuff */ 2309 if (ldep->nbytes > 0) { 2310 rv = vdc_populate_mem_hdl(vdcp, ldep); 2311 if (rv) { 2312 DMSG(vdcp, 0, "[%d] Cannot populate mem handle\n", 2313 vdcp->instance); 2314 return (rv); 2315 } 2316 } 2317 2318 /* 2319 * fill in the data details into the DRing 2320 */ 2321 dep = ldep->dep; 2322 ASSERT(dep != NULL); 2323 2324 dep->payload.req_id = VDC_GET_NEXT_REQ_ID(vdcp); 2325 dep->payload.operation = ldep->operation; 2326 dep->payload.addr = ldep->offset; 2327 dep->payload.nbytes = ldep->nbytes; 2328 dep->payload.status = (uint32_t)-1; /* vds will set valid value */ 2329 dep->payload.slice = ldep->slice; 2330 dep->hdr.dstate = VIO_DESC_READY; 2331 dep->hdr.ack = 1; /* request an ACK for every message */ 2332 2333 return (0); 2334 } 2335 2336 /* 2337 * Function: 2338 * vdc_send_request 2339 * 2340 * Description: 2341 * This routine writes the data to be transmitted to vds into the 2342 * descriptor, notifies vds that the ring has been updated and 2343 * then waits for the request to be processed. 2344 * 2345 * Arguments: 2346 * vdcp - the soft state pointer 2347 * operation - operation we want vds to perform (VD_OP_XXX) 2348 * addr - address of data buf to be read/written. 2349 * nbytes - number of bytes to read/write 2350 * slice - the disk slice this request is for 2351 * offset - relative disk offset 2352 * cb_type - type of call - STRATEGY or SYNC 2353 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2354 * . mode for ioctl(9e) 2355 * . LP64 diskaddr_t (block I/O) 2356 * dir - direction of operation (READ/WRITE/BOTH) 2357 * 2358 * Return Codes: 2359 * 0 2360 * EAGAIN 2361 * EFAULT 2362 * ENXIO 2363 * EIO 2364 */ 2365 static int 2366 vdc_send_request(vdc_t *vdcp, int operation, caddr_t addr, 2367 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2368 void *cb_arg, vio_desc_direction_t dir) 2369 { 2370 ASSERT(vdcp != NULL); 2371 ASSERT(slice < V_NUMPAR); 2372 2373 mutex_enter(&vdcp->lock); 2374 2375 do { 2376 while (vdcp->state != VDC_STATE_RUNNING) 2377 cv_wait(&vdcp->running_cv, &vdcp->lock); 2378 2379 } while (vdc_populate_descriptor(vdcp, operation, addr, 2380 nbytes, slice, offset, cb_type, cb_arg, dir)); 2381 2382 mutex_exit(&vdcp->lock); 2383 return (0); 2384 } 2385 2386 2387 /* 2388 * Function: 2389 * vdc_populate_descriptor 2390 * 2391 * Description: 2392 * This routine writes the data to be transmitted to vds into the 2393 * descriptor, notifies vds that the ring has been updated and 2394 * then waits for the request to be processed. 2395 * 2396 * Arguments: 2397 * vdcp - the soft state pointer 2398 * operation - operation we want vds to perform (VD_OP_XXX) 2399 * addr - address of data buf to be read/written. 2400 * nbytes - number of bytes to read/write 2401 * slice - the disk slice this request is for 2402 * offset - relative disk offset 2403 * cb_type - type of call - STRATEGY or SYNC 2404 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2405 * . mode for ioctl(9e) 2406 * . LP64 diskaddr_t (block I/O) 2407 * dir - direction of operation (READ/WRITE/BOTH) 2408 * 2409 * Return Codes: 2410 * 0 2411 * EAGAIN 2412 * EFAULT 2413 * ENXIO 2414 * EIO 2415 */ 2416 static int 2417 vdc_populate_descriptor(vdc_t *vdcp, int operation, caddr_t addr, 2418 size_t nbytes, int slice, diskaddr_t offset, int cb_type, 2419 void *cb_arg, vio_desc_direction_t dir) 2420 { 2421 vdc_local_desc_t *local_dep = NULL; /* Local Dring Pointer */ 2422 int idx; /* Index of DRing entry used */ 2423 int next_idx; 2424 vio_dring_msg_t dmsg; 2425 size_t msglen; 2426 int rv; 2427 2428 ASSERT(MUTEX_HELD(&vdcp->lock)); 2429 vdcp->threads_pending++; 2430 loop: 2431 DMSG(vdcp, 2, ": dring_curr_idx = %d\n", vdcp->dring_curr_idx); 2432 2433 /* Get next available D-Ring entry */ 2434 idx = vdcp->dring_curr_idx; 2435 local_dep = &(vdcp->local_dring[idx]); 2436 2437 if (!local_dep->is_free) { 2438 DMSG(vdcp, 2, "[%d]: dring full - waiting for space\n", 2439 vdcp->instance); 2440 cv_wait(&vdcp->dring_free_cv, &vdcp->lock); 2441 if (vdcp->state == VDC_STATE_RUNNING || 2442 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2443 goto loop; 2444 } 2445 vdcp->threads_pending--; 2446 return (ECONNRESET); 2447 } 2448 2449 next_idx = idx + 1; 2450 if (next_idx >= vdcp->dring_len) 2451 next_idx = 0; 2452 vdcp->dring_curr_idx = next_idx; 2453 2454 ASSERT(local_dep->is_free); 2455 2456 local_dep->operation = operation; 2457 local_dep->addr = addr; 2458 local_dep->nbytes = nbytes; 2459 local_dep->slice = slice; 2460 local_dep->offset = offset; 2461 local_dep->cb_type = cb_type; 2462 local_dep->cb_arg = cb_arg; 2463 local_dep->dir = dir; 2464 2465 local_dep->is_free = B_FALSE; 2466 2467 rv = vdc_map_to_shared_dring(vdcp, idx); 2468 if (rv) { 2469 DMSG(vdcp, 0, "[%d]: cannot bind memory - waiting ..\n", 2470 vdcp->instance); 2471 /* free the descriptor */ 2472 local_dep->is_free = B_TRUE; 2473 vdcp->dring_curr_idx = idx; 2474 cv_wait(&vdcp->membind_cv, &vdcp->lock); 2475 if (vdcp->state == VDC_STATE_RUNNING || 2476 vdcp->state == VDC_STATE_HANDLE_PENDING) { 2477 goto loop; 2478 } 2479 vdcp->threads_pending--; 2480 return (ECONNRESET); 2481 } 2482 2483 /* 2484 * Send a msg with the DRing details to vds 2485 */ 2486 VIO_INIT_DRING_DATA_TAG(dmsg); 2487 VDC_INIT_DRING_DATA_MSG_IDS(dmsg, vdcp); 2488 dmsg.dring_ident = vdcp->dring_ident; 2489 dmsg.start_idx = idx; 2490 dmsg.end_idx = idx; 2491 vdcp->seq_num++; 2492 2493 DTRACE_IO2(send, vio_dring_msg_t *, &dmsg, vdc_t *, vdcp); 2494 2495 DMSG(vdcp, 2, "ident=0x%lx, st=%u, end=%u, seq=%ld\n", 2496 vdcp->dring_ident, dmsg.start_idx, dmsg.end_idx, dmsg.seq_num); 2497 2498 /* 2499 * note we're still holding the lock here to 2500 * make sure the message goes out in order !!!... 2501 */ 2502 msglen = sizeof (dmsg); 2503 rv = vdc_send(vdcp, (caddr_t)&dmsg, &msglen); 2504 switch (rv) { 2505 case ECONNRESET: 2506 /* 2507 * vdc_send initiates the reset on failure. 2508 * Since the transaction has already been put 2509 * on the local dring, it will automatically get 2510 * retried when the channel is reset. Given that, 2511 * it is ok to just return success even though the 2512 * send failed. 2513 */ 2514 rv = 0; 2515 break; 2516 2517 case 0: /* EOK */ 2518 DMSG(vdcp, 1, "sent via LDC: rv=%d\n", rv); 2519 break; 2520 2521 default: 2522 goto cleanup_and_exit; 2523 } 2524 2525 vdcp->threads_pending--; 2526 return (rv); 2527 2528 cleanup_and_exit: 2529 DMSG(vdcp, 0, "unexpected error, rv=%d\n", rv); 2530 return (ENXIO); 2531 } 2532 2533 /* 2534 * Function: 2535 * vdc_do_sync_op 2536 * 2537 * Description: 2538 * Wrapper around vdc_populate_descriptor that blocks until the 2539 * response to the message is available. 2540 * 2541 * Arguments: 2542 * vdcp - the soft state pointer 2543 * operation - operation we want vds to perform (VD_OP_XXX) 2544 * addr - address of data buf to be read/written. 2545 * nbytes - number of bytes to read/write 2546 * slice - the disk slice this request is for 2547 * offset - relative disk offset 2548 * cb_type - type of call - STRATEGY or SYNC 2549 * cb_arg - parameter to be sent to server (depends on VD_OP_XXX type) 2550 * . mode for ioctl(9e) 2551 * . LP64 diskaddr_t (block I/O) 2552 * dir - direction of operation (READ/WRITE/BOTH) 2553 * 2554 * Return Codes: 2555 * 0 2556 * EAGAIN 2557 * EFAULT 2558 * ENXIO 2559 * EIO 2560 */ 2561 static int 2562 vdc_do_sync_op(vdc_t *vdcp, int operation, caddr_t addr, size_t nbytes, 2563 int slice, diskaddr_t offset, int cb_type, void *cb_arg, 2564 vio_desc_direction_t dir) 2565 { 2566 int status; 2567 2568 ASSERT(cb_type == CB_SYNC); 2569 2570 /* 2571 * Grab the lock, if blocked wait until the server 2572 * response causes us to wake up again. 2573 */ 2574 mutex_enter(&vdcp->lock); 2575 vdcp->sync_op_cnt++; 2576 while (vdcp->sync_op_blocked && vdcp->state != VDC_STATE_DETACH) 2577 cv_wait(&vdcp->sync_blocked_cv, &vdcp->lock); 2578 2579 if (vdcp->state == VDC_STATE_DETACH) { 2580 cv_broadcast(&vdcp->sync_blocked_cv); 2581 vdcp->sync_op_cnt--; 2582 mutex_exit(&vdcp->lock); 2583 return (ENXIO); 2584 } 2585 2586 /* now block anyone other thread entering after us */ 2587 vdcp->sync_op_blocked = B_TRUE; 2588 vdcp->sync_op_pending = B_TRUE; 2589 mutex_exit(&vdcp->lock); 2590 2591 /* 2592 * No need to check return value - will return error only 2593 * in the DETACH case and we can fall through 2594 */ 2595 (void) vdc_send_request(vdcp, operation, addr, 2596 nbytes, slice, offset, cb_type, cb_arg, dir); 2597 2598 /* 2599 * block until our transaction completes. 2600 * Also anyone else waiting also gets to go next. 2601 */ 2602 mutex_enter(&vdcp->lock); 2603 while (vdcp->sync_op_pending && vdcp->state != VDC_STATE_DETACH) 2604 cv_wait(&vdcp->sync_pending_cv, &vdcp->lock); 2605 2606 DMSG(vdcp, 2, ": operation returned %d\n", vdcp->sync_op_status); 2607 if (vdcp->state == VDC_STATE_DETACH) 2608 status = ENXIO; 2609 else 2610 status = vdcp->sync_op_status; 2611 vdcp->sync_op_status = 0; 2612 vdcp->sync_op_blocked = B_FALSE; 2613 vdcp->sync_op_cnt--; 2614 2615 /* signal the next waiting thread */ 2616 cv_signal(&vdcp->sync_blocked_cv); 2617 mutex_exit(&vdcp->lock); 2618 2619 return (status); 2620 } 2621 2622 2623 /* 2624 * Function: 2625 * vdc_drain_response() 2626 * 2627 * Description: 2628 * When a guest is panicking, the completion of requests needs to be 2629 * handled differently because interrupts are disabled and vdc 2630 * will not get messages. We have to poll for the messages instead. 2631 * 2632 * Arguments: 2633 * vdc - soft state pointer for this instance of the device driver. 2634 * 2635 * Return Code: 2636 * 0 - Success 2637 */ 2638 static int 2639 vdc_drain_response(vdc_t *vdc) 2640 { 2641 int rv, idx, retries; 2642 size_t msglen; 2643 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2644 vio_dring_msg_t dmsg; 2645 2646 mutex_enter(&vdc->lock); 2647 2648 retries = 0; 2649 for (;;) { 2650 msglen = sizeof (dmsg); 2651 rv = ldc_read(vdc->ldc_handle, (caddr_t)&dmsg, &msglen); 2652 if (rv) { 2653 rv = EINVAL; 2654 break; 2655 } 2656 2657 /* 2658 * if there are no packets wait and check again 2659 */ 2660 if ((rv == 0) && (msglen == 0)) { 2661 if (retries++ > vdc_dump_retries) { 2662 rv = EAGAIN; 2663 break; 2664 } 2665 2666 drv_usecwait(vdc_usec_timeout_dump); 2667 continue; 2668 } 2669 2670 /* 2671 * Ignore all messages that are not ACKs/NACKs to 2672 * DRing requests. 2673 */ 2674 if ((dmsg.tag.vio_msgtype != VIO_TYPE_DATA) || 2675 (dmsg.tag.vio_subtype_env != VIO_DRING_DATA)) { 2676 DMSG(vdc, 0, "discard pkt: type=%d sub=%d env=%d\n", 2677 dmsg.tag.vio_msgtype, 2678 dmsg.tag.vio_subtype, 2679 dmsg.tag.vio_subtype_env); 2680 continue; 2681 } 2682 2683 /* 2684 * set the appropriate return value for the current request. 2685 */ 2686 switch (dmsg.tag.vio_subtype) { 2687 case VIO_SUBTYPE_ACK: 2688 rv = 0; 2689 break; 2690 case VIO_SUBTYPE_NACK: 2691 rv = EAGAIN; 2692 break; 2693 default: 2694 continue; 2695 } 2696 2697 idx = dmsg.start_idx; 2698 if (idx >= vdc->dring_len) { 2699 DMSG(vdc, 0, "[%d] Bogus ack data : start %d\n", 2700 vdc->instance, idx); 2701 continue; 2702 } 2703 ldep = &vdc->local_dring[idx]; 2704 if (ldep->dep->hdr.dstate != VIO_DESC_DONE) { 2705 DMSG(vdc, 0, "[%d] Entry @ %d - state !DONE %d\n", 2706 vdc->instance, idx, ldep->dep->hdr.dstate); 2707 continue; 2708 } 2709 2710 DMSG(vdc, 1, "[%d] Depopulating idx=%d state=%d\n", 2711 vdc->instance, idx, ldep->dep->hdr.dstate); 2712 rv = vdc_depopulate_descriptor(vdc, idx); 2713 if (rv) { 2714 DMSG(vdc, 0, 2715 "[%d] Entry @ %d - depopulate failed ..\n", 2716 vdc->instance, idx); 2717 } 2718 2719 /* if this is the last descriptor - break out of loop */ 2720 if ((idx + 1) % vdc->dring_len == vdc->dring_curr_idx) 2721 break; 2722 } 2723 2724 mutex_exit(&vdc->lock); 2725 DMSG(vdc, 0, "End idx=%d\n", idx); 2726 2727 return (rv); 2728 } 2729 2730 2731 /* 2732 * Function: 2733 * vdc_depopulate_descriptor() 2734 * 2735 * Description: 2736 * 2737 * Arguments: 2738 * vdc - soft state pointer for this instance of the device driver. 2739 * idx - Index of the Descriptor Ring entry being modified 2740 * 2741 * Return Code: 2742 * 0 - Success 2743 */ 2744 static int 2745 vdc_depopulate_descriptor(vdc_t *vdc, uint_t idx) 2746 { 2747 vd_dring_entry_t *dep = NULL; /* Dring Entry Pointer */ 2748 vdc_local_desc_t *ldep = NULL; /* Local Dring Entry Pointer */ 2749 int status = ENXIO; 2750 int operation; 2751 int rv = 0; 2752 2753 ASSERT(vdc != NULL); 2754 ASSERT(idx < vdc->dring_len); 2755 ldep = &vdc->local_dring[idx]; 2756 ASSERT(ldep != NULL); 2757 ASSERT(MUTEX_HELD(&vdc->lock)); 2758 2759 DMSG(vdc, 2, ": idx = %d\n", idx); 2760 dep = ldep->dep; 2761 ASSERT(dep != NULL); 2762 ASSERT((dep->hdr.dstate == VIO_DESC_DONE) || 2763 (dep->payload.status == ECANCELED)); 2764 2765 VDC_MARK_DRING_ENTRY_FREE(vdc, idx); 2766 2767 ldep->is_free = B_TRUE; 2768 DMSG(vdc, 2, ": is_free = %d\n", ldep->is_free); 2769 status = dep->payload.status; 2770 operation = dep->payload.operation; 2771 2772 /* the DKIO FLUSH operation never bind handles so we can return now */ 2773 if (operation == VD_OP_FLUSH) 2774 return (status); 2775 2776 /* 2777 * If the upper layer passed in a misaligned address we copied the 2778 * data into an aligned buffer before sending it to LDC - we now 2779 * copy it back to the original buffer. 2780 */ 2781 if (ldep->align_addr) { 2782 ASSERT(ldep->addr != NULL); 2783 ASSERT(dep->payload.nbytes > 0); 2784 2785 bcopy(ldep->align_addr, ldep->addr, dep->payload.nbytes); 2786 kmem_free(ldep->align_addr, 2787 sizeof (caddr_t) * P2ROUNDUP(dep->payload.nbytes, 8)); 2788 ldep->align_addr = NULL; 2789 } 2790 2791 rv = ldc_mem_unbind_handle(ldep->desc_mhdl); 2792 if (rv != 0) { 2793 DMSG(vdc, 0, "?[%d] unbind mhdl 0x%lx @ idx %d failed (%d)", 2794 vdc->instance, ldep->desc_mhdl, idx, rv); 2795 /* 2796 * The error returned by the vDisk server is more informative 2797 * and thus has a higher priority but if it isn't set we ensure 2798 * that this function returns an error. 2799 */ 2800 if (status == 0) 2801 status = EINVAL; 2802 } 2803 2804 cv_signal(&vdc->membind_cv); 2805 cv_signal(&vdc->dring_free_cv); 2806 2807 return (status); 2808 } 2809 2810 /* 2811 * Function: 2812 * vdc_populate_mem_hdl() 2813 * 2814 * Description: 2815 * 2816 * Arguments: 2817 * vdc - soft state pointer for this instance of the device driver. 2818 * idx - Index of the Descriptor Ring entry being modified 2819 * addr - virtual address being mapped in 2820 * nybtes - number of bytes in 'addr' 2821 * operation - the vDisk operation being performed (VD_OP_xxx) 2822 * 2823 * Return Code: 2824 * 0 - Success 2825 */ 2826 static int 2827 vdc_populate_mem_hdl(vdc_t *vdcp, vdc_local_desc_t *ldep) 2828 { 2829 vd_dring_entry_t *dep = NULL; 2830 ldc_mem_handle_t mhdl; 2831 caddr_t vaddr; 2832 size_t nbytes; 2833 uint8_t perm = LDC_MEM_RW; 2834 uint8_t maptype; 2835 int rv = 0; 2836 int i; 2837 2838 ASSERT(vdcp != NULL); 2839 2840 dep = ldep->dep; 2841 mhdl = ldep->desc_mhdl; 2842 2843 switch (ldep->dir) { 2844 case VIO_read_dir: 2845 perm = LDC_MEM_W; 2846 break; 2847 2848 case VIO_write_dir: 2849 perm = LDC_MEM_R; 2850 break; 2851 2852 case VIO_both_dir: 2853 perm = LDC_MEM_RW; 2854 break; 2855 2856 default: 2857 ASSERT(0); /* catch bad programming in vdc */ 2858 } 2859 2860 /* 2861 * LDC expects any addresses passed in to be 8-byte aligned. We need 2862 * to copy the contents of any misaligned buffers to a newly allocated 2863 * buffer and bind it instead (and copy the the contents back to the 2864 * original buffer passed in when depopulating the descriptor) 2865 */ 2866 vaddr = ldep->addr; 2867 nbytes = ldep->nbytes; 2868 if (((uint64_t)vaddr & 0x7) != 0) { 2869 ASSERT(ldep->align_addr == NULL); 2870 ldep->align_addr = 2871 kmem_alloc(sizeof (caddr_t) * 2872 P2ROUNDUP(nbytes, 8), KM_SLEEP); 2873 DMSG(vdcp, 0, "[%d] Misaligned address %p reallocating " 2874 "(buf=%p nb=%ld op=%d)\n", 2875 vdcp->instance, (void *)vaddr, (void *)ldep->align_addr, 2876 nbytes, ldep->operation); 2877 if (perm != LDC_MEM_W) 2878 bcopy(vaddr, ldep->align_addr, nbytes); 2879 vaddr = ldep->align_addr; 2880 } 2881 2882 maptype = LDC_IO_MAP|LDC_SHADOW_MAP|LDC_DIRECT_MAP; 2883 rv = ldc_mem_bind_handle(mhdl, vaddr, P2ROUNDUP(nbytes, 8), 2884 maptype, perm, &dep->payload.cookie[0], 2885 &dep->payload.ncookies); 2886 DMSG(vdcp, 2, "[%d] bound mem handle; ncookies=%d\n", 2887 vdcp->instance, dep->payload.ncookies); 2888 if (rv != 0) { 2889 DMSG(vdcp, 0, "[%d] Failed to bind LDC memory handle " 2890 "(mhdl=%p, buf=%p, err=%d)\n", 2891 vdcp->instance, (void *)mhdl, (void *)vaddr, rv); 2892 if (ldep->align_addr) { 2893 kmem_free(ldep->align_addr, 2894 sizeof (caddr_t) * P2ROUNDUP(nbytes, 8)); 2895 ldep->align_addr = NULL; 2896 } 2897 return (EAGAIN); 2898 } 2899 2900 /* 2901 * Get the other cookies (if any). 2902 */ 2903 for (i = 1; i < dep->payload.ncookies; i++) { 2904 rv = ldc_mem_nextcookie(mhdl, &dep->payload.cookie[i]); 2905 if (rv != 0) { 2906 (void) ldc_mem_unbind_handle(mhdl); 2907 DMSG(vdcp, 0, "?[%d] Failed to get next cookie " 2908 "(mhdl=%lx cnum=%d), err=%d", 2909 vdcp->instance, mhdl, i, rv); 2910 if (ldep->align_addr) { 2911 kmem_free(ldep->align_addr, 2912 sizeof (caddr_t) * dep->payload.nbytes); 2913 ldep->align_addr = NULL; 2914 } 2915 return (EAGAIN); 2916 } 2917 } 2918 2919 return (rv); 2920 } 2921 2922 /* 2923 * Interrupt handlers for messages from LDC 2924 */ 2925 2926 /* 2927 * Function: 2928 * vdc_handle_cb() 2929 * 2930 * Description: 2931 * 2932 * Arguments: 2933 * event - Type of event (LDC_EVT_xxx) that triggered the callback 2934 * arg - soft state pointer for this instance of the device driver. 2935 * 2936 * Return Code: 2937 * 0 - Success 2938 */ 2939 static uint_t 2940 vdc_handle_cb(uint64_t event, caddr_t arg) 2941 { 2942 ldc_status_t ldc_state; 2943 int rv = 0; 2944 2945 vdc_t *vdc = (vdc_t *)(void *)arg; 2946 2947 ASSERT(vdc != NULL); 2948 2949 DMSG(vdc, 1, "evt=%lx seqID=%ld\n", event, vdc->seq_num); 2950 2951 /* 2952 * Depending on the type of event that triggered this callback, 2953 * we modify the handshake state or read the data. 2954 * 2955 * NOTE: not done as a switch() as event could be triggered by 2956 * a state change and a read request. Also the ordering of the 2957 * check for the event types is deliberate. 2958 */ 2959 if (event & LDC_EVT_UP) { 2960 DMSG(vdc, 0, "[%d] Received LDC_EVT_UP\n", vdc->instance); 2961 2962 mutex_enter(&vdc->lock); 2963 2964 /* get LDC state */ 2965 rv = ldc_status(vdc->ldc_handle, &ldc_state); 2966 if (rv != 0) { 2967 DMSG(vdc, 0, "[%d] Couldn't get LDC status %d", 2968 vdc->instance, rv); 2969 return (LDC_SUCCESS); 2970 } 2971 if (vdc->ldc_state != LDC_UP && ldc_state == LDC_UP) { 2972 /* 2973 * Reset the transaction sequence numbers when 2974 * LDC comes up. We then kick off the handshake 2975 * negotiation with the vDisk server. 2976 */ 2977 vdc->seq_num = 1; 2978 vdc->seq_num_reply = 0; 2979 vdc->ldc_state = ldc_state; 2980 cv_signal(&vdc->initwait_cv); 2981 } 2982 2983 mutex_exit(&vdc->lock); 2984 } 2985 2986 if (event & LDC_EVT_READ) { 2987 DMSG(vdc, 0, "[%d] Received LDC_EVT_READ\n", vdc->instance); 2988 mutex_enter(&vdc->read_lock); 2989 cv_signal(&vdc->read_cv); 2990 vdc->read_state = VDC_READ_PENDING; 2991 mutex_exit(&vdc->read_lock); 2992 2993 /* that's all we have to do - no need to handle DOWN/RESET */ 2994 return (LDC_SUCCESS); 2995 } 2996 2997 if (event & (LDC_EVT_RESET|LDC_EVT_DOWN)) { 2998 2999 DMSG(vdc, 0, "[%d] Received LDC RESET event\n", vdc->instance); 3000 3001 mutex_enter(&vdc->lock); 3002 /* 3003 * Need to wake up any readers so they will 3004 * detect that a reset has occurred. 3005 */ 3006 mutex_enter(&vdc->read_lock); 3007 if ((vdc->read_state == VDC_READ_WAITING) || 3008 (vdc->read_state == VDC_READ_RESET)) 3009 cv_signal(&vdc->read_cv); 3010 vdc->read_state = VDC_READ_RESET; 3011 mutex_exit(&vdc->read_lock); 3012 3013 /* wake up any threads waiting for connection to come up */ 3014 if (vdc->state == VDC_STATE_INIT_WAITING) { 3015 vdc->state = VDC_STATE_RESETTING; 3016 cv_signal(&vdc->initwait_cv); 3017 } 3018 3019 mutex_exit(&vdc->lock); 3020 } 3021 3022 if (event & ~(LDC_EVT_UP | LDC_EVT_RESET | LDC_EVT_DOWN | LDC_EVT_READ)) 3023 DMSG(vdc, 0, "![%d] Unexpected LDC event (%lx) received", 3024 vdc->instance, event); 3025 3026 return (LDC_SUCCESS); 3027 } 3028 3029 /* 3030 * Function: 3031 * vdc_wait_for_response() 3032 * 3033 * Description: 3034 * Block waiting for a response from the server. If there is 3035 * no data the thread block on the read_cv that is signalled 3036 * by the callback when an EVT_READ occurs. 3037 * 3038 * Arguments: 3039 * vdcp - soft state pointer for this instance of the device driver. 3040 * 3041 * Return Code: 3042 * 0 - Success 3043 */ 3044 static int 3045 vdc_wait_for_response(vdc_t *vdcp, vio_msg_t *msgp) 3046 { 3047 size_t nbytes = sizeof (*msgp); 3048 int status; 3049 3050 ASSERT(vdcp != NULL); 3051 3052 DMSG(vdcp, 1, "[%d] Entered\n", vdcp->instance); 3053 3054 status = vdc_recv(vdcp, msgp, &nbytes); 3055 DMSG(vdcp, 3, "vdc_read() done.. status=0x%x size=0x%x\n", 3056 status, (int)nbytes); 3057 if (status) { 3058 DMSG(vdcp, 0, "?[%d] Error %d reading LDC msg\n", 3059 vdcp->instance, status); 3060 return (status); 3061 } 3062 3063 if (nbytes < sizeof (vio_msg_tag_t)) { 3064 DMSG(vdcp, 0, "?[%d] Expect %lu bytes; recv'd %lu\n", 3065 vdcp->instance, sizeof (vio_msg_tag_t), nbytes); 3066 return (ENOMSG); 3067 } 3068 3069 DMSG(vdcp, 2, "[%d] (%x/%x/%x)\n", vdcp->instance, 3070 msgp->tag.vio_msgtype, 3071 msgp->tag.vio_subtype, 3072 msgp->tag.vio_subtype_env); 3073 3074 /* 3075 * Verify the Session ID of the message 3076 * 3077 * Every message after the Version has been negotiated should 3078 * have the correct session ID set. 3079 */ 3080 if ((msgp->tag.vio_sid != vdcp->session_id) && 3081 (msgp->tag.vio_subtype_env != VIO_VER_INFO)) { 3082 DMSG(vdcp, 0, "[%d] Invalid SID: received 0x%x, " 3083 "expected 0x%lx [seq num %lx @ %d]", 3084 vdcp->instance, msgp->tag.vio_sid, 3085 vdcp->session_id, 3086 ((vio_dring_msg_t *)msgp)->seq_num, 3087 ((vio_dring_msg_t *)msgp)->start_idx); 3088 return (ENOMSG); 3089 } 3090 return (0); 3091 } 3092 3093 3094 /* 3095 * Function: 3096 * vdc_resubmit_backup_dring() 3097 * 3098 * Description: 3099 * Resubmit each descriptor in the backed up dring to 3100 * vDisk server. The Dring was backed up during connection 3101 * reset. 3102 * 3103 * Arguments: 3104 * vdcp - soft state pointer for this instance of the device driver. 3105 * 3106 * Return Code: 3107 * 0 - Success 3108 */ 3109 static int 3110 vdc_resubmit_backup_dring(vdc_t *vdcp) 3111 { 3112 int count; 3113 int b_idx; 3114 int rv; 3115 int dring_size; 3116 int status; 3117 vio_msg_t vio_msg; 3118 vdc_local_desc_t *curr_ldep; 3119 3120 ASSERT(MUTEX_NOT_HELD(&vdcp->lock)); 3121 ASSERT(vdcp->state == VDC_STATE_HANDLE_PENDING); 3122 3123 DMSG(vdcp, 1, "restoring pending dring entries (len=%d, tail=%d)\n", 3124 vdcp->local_dring_backup_len, vdcp->local_dring_backup_tail); 3125 3126 /* 3127 * Walk the backup copy of the local descriptor ring and 3128 * resubmit all the outstanding transactions. 3129 */ 3130 b_idx = vdcp->local_dring_backup_tail; 3131 for (count = 0; count < vdcp->local_dring_backup_len; count++) { 3132 3133 curr_ldep = &(vdcp->local_dring_backup[b_idx]); 3134 3135 /* only resubmit oustanding transactions */ 3136 if (!curr_ldep->is_free) { 3137 3138 DMSG(vdcp, 1, "resubmitting entry idx=%x\n", b_idx); 3139 mutex_enter(&vdcp->lock); 3140 rv = vdc_populate_descriptor(vdcp, curr_ldep->operation, 3141 curr_ldep->addr, curr_ldep->nbytes, 3142 curr_ldep->slice, curr_ldep->offset, 3143 curr_ldep->cb_type, curr_ldep->cb_arg, 3144 curr_ldep->dir); 3145 mutex_exit(&vdcp->lock); 3146 if (rv) { 3147 DMSG(vdcp, 1, "[%d] cannot resubmit entry %d\n", 3148 vdcp->instance, b_idx); 3149 return (rv); 3150 } 3151 3152 /* Wait for the response message. */ 3153 DMSG(vdcp, 1, "waiting for response to idx=%x\n", 3154 b_idx); 3155 status = vdc_wait_for_response(vdcp, &vio_msg); 3156 if (status) { 3157 DMSG(vdcp, 1, "[%d] wait_for_response " 3158 "returned err=%d\n", vdcp->instance, 3159 status); 3160 return (status); 3161 } 3162 3163 DMSG(vdcp, 1, "processing msg for idx=%x\n", b_idx); 3164 status = vdc_process_data_msg(vdcp, &vio_msg); 3165 if (status) { 3166 DMSG(vdcp, 1, "[%d] process_data_msg " 3167 "returned err=%d\n", vdcp->instance, 3168 status); 3169 return (status); 3170 } 3171 } 3172 3173 /* get the next element to submit */ 3174 if (++b_idx >= vdcp->local_dring_backup_len) 3175 b_idx = 0; 3176 } 3177 3178 /* all done - now clear up pending dring copy */ 3179 dring_size = vdcp->local_dring_backup_len * 3180 sizeof (vdcp->local_dring_backup[0]); 3181 3182 (void) kmem_free(vdcp->local_dring_backup, dring_size); 3183 3184 vdcp->local_dring_backup = NULL; 3185 3186 return (0); 3187 } 3188 3189 /* 3190 * Function: 3191 * vdc_backup_local_dring() 3192 * 3193 * Description: 3194 * Backup the current dring in the event of a reset. The Dring 3195 * transactions will be resubmitted to the server when the 3196 * connection is restored. 3197 * 3198 * Arguments: 3199 * vdcp - soft state pointer for this instance of the device driver. 3200 * 3201 * Return Code: 3202 * NONE 3203 */ 3204 static void 3205 vdc_backup_local_dring(vdc_t *vdcp) 3206 { 3207 int dring_size; 3208 3209 ASSERT(vdcp->state == VDC_STATE_RESETTING); 3210 3211 /* 3212 * If the backup dring is stil around, it means 3213 * that the last restore did not complete. However, 3214 * since we never got back into the running state, 3215 * the backup copy we have is still valid. 3216 */ 3217 if (vdcp->local_dring_backup != NULL) { 3218 DMSG(vdcp, 1, "reusing local descriptor ring backup " 3219 "(len=%d, tail=%d)\n", vdcp->local_dring_backup_len, 3220 vdcp->local_dring_backup_tail); 3221 return; 3222 } 3223 3224 DMSG(vdcp, 1, "backing up the local descriptor ring (len=%d, " 3225 "tail=%d)\n", vdcp->dring_len, vdcp->dring_curr_idx); 3226 3227 dring_size = vdcp->dring_len * sizeof (vdcp->local_dring[0]); 3228 3229 vdcp->local_dring_backup = kmem_alloc(dring_size, KM_SLEEP); 3230 bcopy(vdcp->local_dring, vdcp->local_dring_backup, dring_size); 3231 3232 vdcp->local_dring_backup_tail = vdcp->dring_curr_idx; 3233 vdcp->local_dring_backup_len = vdcp->dring_len; 3234 } 3235 3236 /* -------------------------------------------------------------------------- */ 3237 3238 /* 3239 * The following functions process the incoming messages from vds 3240 */ 3241 3242 /* 3243 * Function: 3244 * vdc_process_msg_thread() 3245 * 3246 * Description: 3247 * 3248 * Main VDC message processing thread. Each vDisk instance 3249 * consists of a copy of this thread. This thread triggers 3250 * all the handshakes and data exchange with the server. It 3251 * also handles all channel resets 3252 * 3253 * Arguments: 3254 * vdc - soft state pointer for this instance of the device driver. 3255 * 3256 * Return Code: 3257 * None 3258 */ 3259 static void 3260 vdc_process_msg_thread(vdc_t *vdcp) 3261 { 3262 int status; 3263 3264 mutex_enter(&vdcp->lock); 3265 3266 for (;;) { 3267 3268 #define Q(_s) (vdcp->state == _s) ? #_s : 3269 DMSG(vdcp, 3, "state = %d (%s)\n", vdcp->state, 3270 Q(VDC_STATE_INIT) 3271 Q(VDC_STATE_INIT_WAITING) 3272 Q(VDC_STATE_NEGOTIATE) 3273 Q(VDC_STATE_HANDLE_PENDING) 3274 Q(VDC_STATE_RUNNING) 3275 Q(VDC_STATE_RESETTING) 3276 Q(VDC_STATE_DETACH) 3277 "UNKNOWN"); 3278 3279 switch (vdcp->state) { 3280 case VDC_STATE_INIT: 3281 3282 /* Check if have re-initializing repeatedly */ 3283 if (vdcp->hshake_cnt++ > VDC_RETRIES) { 3284 vdcp->state = VDC_STATE_DETACH; 3285 break; 3286 } 3287 3288 /* Bring up connection with vds via LDC */ 3289 status = vdc_start_ldc_connection(vdcp); 3290 switch (status) { 3291 case EINVAL: 3292 DMSG(vdcp, 0, "[%d] Could not start LDC", 3293 vdcp->instance); 3294 vdcp->state = VDC_STATE_DETACH; 3295 break; 3296 case 0: 3297 vdcp->state = VDC_STATE_INIT_WAITING; 3298 break; 3299 default: 3300 vdcp->state = VDC_STATE_INIT_WAITING; 3301 break; 3302 } 3303 break; 3304 3305 case VDC_STATE_INIT_WAITING: 3306 3307 /* 3308 * Let the callback event move us on 3309 * when channel is open to server 3310 */ 3311 while (vdcp->ldc_state != LDC_UP) { 3312 cv_wait(&vdcp->initwait_cv, &vdcp->lock); 3313 if (vdcp->state != VDC_STATE_INIT_WAITING) { 3314 DMSG(vdcp, 0, 3315 "state moved to %d out from under us...\n", 3316 vdcp->state); 3317 3318 break; 3319 } 3320 } 3321 if (vdcp->state == VDC_STATE_INIT_WAITING && 3322 vdcp->ldc_state == LDC_UP) { 3323 vdcp->state = VDC_STATE_NEGOTIATE; 3324 } 3325 break; 3326 3327 case VDC_STATE_NEGOTIATE: 3328 switch (status = vdc_ver_negotiation(vdcp)) { 3329 case 0: 3330 break; 3331 default: 3332 DMSG(vdcp, 0, "ver negotiate failed (%d)..\n", 3333 status); 3334 goto reset; 3335 } 3336 3337 switch (status = vdc_attr_negotiation(vdcp)) { 3338 case 0: 3339 break; 3340 default: 3341 DMSG(vdcp, 0, "attr negotiate failed (%d)..\n", 3342 status); 3343 goto reset; 3344 } 3345 3346 switch (status = vdc_dring_negotiation(vdcp)) { 3347 case 0: 3348 break; 3349 default: 3350 DMSG(vdcp, 0, "dring negotiate failed (%d)..\n", 3351 status); 3352 goto reset; 3353 } 3354 3355 switch (status = vdc_rdx_exchange(vdcp)) { 3356 case 0: 3357 vdcp->state = VDC_STATE_HANDLE_PENDING; 3358 goto done; 3359 default: 3360 DMSG(vdcp, 0, "RDX xchg failed ..(%d)\n", 3361 status); 3362 goto reset; 3363 } 3364 reset: 3365 DMSG(vdcp, 0, "negotiation failed: resetting (%d)\n", 3366 status); 3367 vdcp->state = VDC_STATE_RESETTING; 3368 done: 3369 DMSG(vdcp, 0, "negotiation complete (state=0x%x)...\n", 3370 vdcp->state); 3371 break; 3372 3373 case VDC_STATE_HANDLE_PENDING: 3374 3375 mutex_exit(&vdcp->lock); 3376 status = vdc_resubmit_backup_dring(vdcp); 3377 mutex_enter(&vdcp->lock); 3378 3379 if (status) 3380 vdcp->state = VDC_STATE_RESETTING; 3381 else 3382 vdcp->state = VDC_STATE_RUNNING; 3383 3384 break; 3385 3386 /* enter running state */ 3387 case VDC_STATE_RUNNING: 3388 /* 3389 * Signal anyone waiting for the connection 3390 * to come on line. 3391 */ 3392 vdcp->hshake_cnt = 0; 3393 cv_broadcast(&vdcp->running_cv); 3394 mutex_exit(&vdcp->lock); 3395 3396 for (;;) { 3397 vio_msg_t msg; 3398 status = vdc_wait_for_response(vdcp, &msg); 3399 if (status) break; 3400 3401 DMSG(vdcp, 1, "[%d] new pkt(s) available\n", 3402 vdcp->instance); 3403 status = vdc_process_data_msg(vdcp, &msg); 3404 if (status) { 3405 DMSG(vdcp, 1, "[%d] process_data_msg " 3406 "returned err=%d\n", vdcp->instance, 3407 status); 3408 break; 3409 } 3410 3411 } 3412 3413 mutex_enter(&vdcp->lock); 3414 3415 vdcp->state = VDC_STATE_RESETTING; 3416 break; 3417 3418 case VDC_STATE_RESETTING: 3419 DMSG(vdcp, 0, "Initiating channel reset " 3420 "(pending = %d)\n", (int)vdcp->threads_pending); 3421 3422 if (vdcp->self_reset) { 3423 DMSG(vdcp, 0, 3424 "[%d] calling stop_ldc_connection.\n", 3425 vdcp->instance); 3426 status = vdc_stop_ldc_connection(vdcp); 3427 vdcp->self_reset = B_FALSE; 3428 } 3429 3430 /* 3431 * Wait for all threads currently waiting 3432 * for a free dring entry to use. 3433 */ 3434 while (vdcp->threads_pending) { 3435 cv_broadcast(&vdcp->membind_cv); 3436 cv_broadcast(&vdcp->dring_free_cv); 3437 mutex_exit(&vdcp->lock); 3438 /* let them wake up */ 3439 drv_usecwait(vdc_min_timeout_ldc); 3440 mutex_enter(&vdcp->lock); 3441 } 3442 3443 ASSERT(vdcp->threads_pending == 0); 3444 3445 /* Sanity check that no thread is receiving */ 3446 ASSERT(vdcp->read_state != VDC_READ_WAITING); 3447 3448 vdcp->read_state = VDC_READ_IDLE; 3449 3450 vdc_backup_local_dring(vdcp); 3451 3452 /* cleanup the old d-ring */ 3453 vdc_destroy_descriptor_ring(vdcp); 3454 3455 /* go and start again */ 3456 vdcp->state = VDC_STATE_INIT; 3457 3458 break; 3459 3460 case VDC_STATE_DETACH: 3461 DMSG(vdcp, 0, "[%d] Reset thread exit cleanup ..\n", 3462 vdcp->instance); 3463 3464 while (vdcp->sync_op_pending) { 3465 cv_signal(&vdcp->sync_pending_cv); 3466 cv_signal(&vdcp->sync_blocked_cv); 3467 mutex_exit(&vdcp->lock); 3468 drv_usecwait(vdc_min_timeout_ldc); 3469 mutex_enter(&vdcp->lock); 3470 } 3471 3472 cv_signal(&vdcp->running_cv); 3473 mutex_exit(&vdcp->lock); 3474 3475 DMSG(vdcp, 0, "[%d] Msg processing thread exiting ..\n", 3476 vdcp->instance); 3477 thread_exit(); 3478 break; 3479 } 3480 } 3481 } 3482 3483 3484 /* 3485 * Function: 3486 * vdc_process_data_msg() 3487 * 3488 * Description: 3489 * This function is called by the message processing thread each time 3490 * a message with a msgtype of VIO_TYPE_DATA is received. It will either 3491 * be an ACK or NACK from vds[1] which vdc handles as follows. 3492 * ACK - wake up the waiting thread 3493 * NACK - resend any messages necessary 3494 * 3495 * [1] Although the message format allows it, vds should not send a 3496 * VIO_SUBTYPE_INFO message to vdc asking it to read data; if for 3497 * some bizarre reason it does, vdc will reset the connection. 3498 * 3499 * Arguments: 3500 * vdc - soft state pointer for this instance of the device driver. 3501 * msg - the LDC message sent by vds 3502 * 3503 * Return Code: 3504 * 0 - Success. 3505 * > 0 - error value returned by LDC 3506 */ 3507 static int 3508 vdc_process_data_msg(vdc_t *vdcp, vio_msg_t *msg) 3509 { 3510 int status = 0; 3511 vio_dring_msg_t *dring_msg; 3512 vdc_local_desc_t *ldep = NULL; 3513 int start, end; 3514 int idx; 3515 3516 dring_msg = (vio_dring_msg_t *)msg; 3517 3518 ASSERT(msg->tag.vio_msgtype == VIO_TYPE_DATA); 3519 ASSERT(vdcp != NULL); 3520 3521 mutex_enter(&vdcp->lock); 3522 3523 /* 3524 * Check to see if the message has bogus data 3525 */ 3526 idx = start = dring_msg->start_idx; 3527 end = dring_msg->end_idx; 3528 if ((start >= vdcp->dring_len) || 3529 (end >= vdcp->dring_len) || (end < -1)) { 3530 DMSG(vdcp, 0, "[%d] Bogus ACK data : start %d, end %d\n", 3531 vdcp->instance, start, end); 3532 mutex_exit(&vdcp->lock); 3533 return (EINVAL); 3534 } 3535 3536 /* 3537 * Verify that the sequence number is what vdc expects. 3538 */ 3539 switch (vdc_verify_seq_num(vdcp, dring_msg)) { 3540 case VDC_SEQ_NUM_TODO: 3541 break; /* keep processing this message */ 3542 case VDC_SEQ_NUM_SKIP: 3543 mutex_exit(&vdcp->lock); 3544 return (0); 3545 case VDC_SEQ_NUM_INVALID: 3546 mutex_exit(&vdcp->lock); 3547 DMSG(vdcp, 0, "[%d] invalid seqno\n", vdcp->instance); 3548 return (ENXIO); 3549 } 3550 3551 if (msg->tag.vio_subtype == VIO_SUBTYPE_NACK) { 3552 DMSG(vdcp, 0, "[%d] DATA NACK\n", vdcp->instance); 3553 VDC_DUMP_DRING_MSG(dring_msg); 3554 mutex_exit(&vdcp->lock); 3555 return (EIO); 3556 3557 } else if (msg->tag.vio_subtype == VIO_SUBTYPE_INFO) { 3558 mutex_exit(&vdcp->lock); 3559 return (EPROTO); 3560 } 3561 3562 DTRACE_IO2(recv, vio_dring_msg_t, dring_msg, vdc_t *, vdcp); 3563 DMSG(vdcp, 1, ": start %d end %d\n", start, end); 3564 ASSERT(start == end); 3565 3566 ldep = &vdcp->local_dring[idx]; 3567 3568 DMSG(vdcp, 1, ": state 0x%x - cb_type 0x%x\n", 3569 ldep->dep->hdr.dstate, ldep->cb_type); 3570 3571 if (ldep->dep->hdr.dstate == VIO_DESC_DONE) { 3572 struct buf *bufp; 3573 3574 switch (ldep->cb_type) { 3575 case CB_SYNC: 3576 ASSERT(vdcp->sync_op_pending); 3577 3578 status = vdc_depopulate_descriptor(vdcp, idx); 3579 vdcp->sync_op_status = status; 3580 vdcp->sync_op_pending = B_FALSE; 3581 cv_signal(&vdcp->sync_pending_cv); 3582 break; 3583 3584 case CB_STRATEGY: 3585 bufp = ldep->cb_arg; 3586 ASSERT(bufp != NULL); 3587 status = ldep->dep->payload.status; /* Future:ntoh */ 3588 if (status != 0) { 3589 DMSG(vdcp, 1, "strategy status=%d\n", status); 3590 bioerror(bufp, status); 3591 } 3592 status = vdc_depopulate_descriptor(vdcp, idx); 3593 biodone(bufp); 3594 break; 3595 3596 default: 3597 ASSERT(0); 3598 } 3599 } 3600 3601 /* let the arrival signal propogate */ 3602 mutex_exit(&vdcp->lock); 3603 3604 /* probe gives the count of how many entries were processed */ 3605 DTRACE_IO2(processed, int, 1, vdc_t *, vdcp); 3606 3607 return (0); 3608 } 3609 3610 /* 3611 * Function: 3612 * vdc_process_err_msg() 3613 * 3614 * NOTE: No error messages are used as part of the vDisk protocol 3615 */ 3616 static int 3617 vdc_process_err_msg(vdc_t *vdc, vio_msg_t msg) 3618 { 3619 _NOTE(ARGUNUSED(vdc)) 3620 _NOTE(ARGUNUSED(msg)) 3621 3622 ASSERT(msg.tag.vio_msgtype == VIO_TYPE_ERR); 3623 DMSG(vdc, 1, "[%d] Got an ERR msg", vdc->instance); 3624 3625 return (ENOTSUP); 3626 } 3627 3628 /* 3629 * Function: 3630 * vdc_handle_ver_msg() 3631 * 3632 * Description: 3633 * 3634 * Arguments: 3635 * vdc - soft state pointer for this instance of the device driver. 3636 * ver_msg - LDC message sent by vDisk server 3637 * 3638 * Return Code: 3639 * 0 - Success 3640 */ 3641 static int 3642 vdc_handle_ver_msg(vdc_t *vdc, vio_ver_msg_t *ver_msg) 3643 { 3644 int status = 0; 3645 3646 ASSERT(vdc != NULL); 3647 ASSERT(mutex_owned(&vdc->lock)); 3648 3649 if (ver_msg->tag.vio_subtype_env != VIO_VER_INFO) { 3650 return (EPROTO); 3651 } 3652 3653 if (ver_msg->dev_class != VDEV_DISK_SERVER) { 3654 return (EINVAL); 3655 } 3656 3657 switch (ver_msg->tag.vio_subtype) { 3658 case VIO_SUBTYPE_ACK: 3659 /* 3660 * We check to see if the version returned is indeed supported 3661 * (The server may have also adjusted the minor number downwards 3662 * and if so 'ver_msg' will contain the actual version agreed) 3663 */ 3664 if (vdc_is_supported_version(ver_msg)) { 3665 vdc->ver.major = ver_msg->ver_major; 3666 vdc->ver.minor = ver_msg->ver_minor; 3667 ASSERT(vdc->ver.major > 0); 3668 } else { 3669 status = EPROTO; 3670 } 3671 break; 3672 3673 case VIO_SUBTYPE_NACK: 3674 /* 3675 * call vdc_is_supported_version() which will return the next 3676 * supported version (if any) in 'ver_msg' 3677 */ 3678 (void) vdc_is_supported_version(ver_msg); 3679 if (ver_msg->ver_major > 0) { 3680 size_t len = sizeof (*ver_msg); 3681 3682 ASSERT(vdc->ver.major > 0); 3683 3684 /* reset the necessary fields and resend */ 3685 ver_msg->tag.vio_subtype = VIO_SUBTYPE_INFO; 3686 ver_msg->dev_class = VDEV_DISK; 3687 3688 status = vdc_send(vdc, (caddr_t)ver_msg, &len); 3689 DMSG(vdc, 0, "[%d] Resend VER info (LDC status = %d)\n", 3690 vdc->instance, status); 3691 if (len != sizeof (*ver_msg)) 3692 status = EBADMSG; 3693 } else { 3694 DMSG(vdc, 0, "[%d] No common version with " 3695 "vDisk server", vdc->instance); 3696 status = ENOTSUP; 3697 } 3698 3699 break; 3700 case VIO_SUBTYPE_INFO: 3701 /* 3702 * Handle the case where vds starts handshake 3703 * (for now only vdc is the instigatior) 3704 */ 3705 status = ENOTSUP; 3706 break; 3707 3708 default: 3709 status = EINVAL; 3710 break; 3711 } 3712 3713 return (status); 3714 } 3715 3716 /* 3717 * Function: 3718 * vdc_handle_attr_msg() 3719 * 3720 * Description: 3721 * 3722 * Arguments: 3723 * vdc - soft state pointer for this instance of the device driver. 3724 * attr_msg - LDC message sent by vDisk server 3725 * 3726 * Return Code: 3727 * 0 - Success 3728 */ 3729 static int 3730 vdc_handle_attr_msg(vdc_t *vdc, vd_attr_msg_t *attr_msg) 3731 { 3732 int status = 0; 3733 3734 ASSERT(vdc != NULL); 3735 ASSERT(mutex_owned(&vdc->lock)); 3736 3737 if (attr_msg->tag.vio_subtype_env != VIO_ATTR_INFO) { 3738 return (EPROTO); 3739 } 3740 3741 switch (attr_msg->tag.vio_subtype) { 3742 case VIO_SUBTYPE_ACK: 3743 /* 3744 * We now verify the attributes sent by vds. 3745 */ 3746 vdc->vdisk_size = attr_msg->vdisk_size; 3747 vdc->vdisk_type = attr_msg->vdisk_type; 3748 3749 DMSG(vdc, 0, "[%d] max_xfer_sz: sent %lx acked %lx\n", 3750 vdc->instance, vdc->max_xfer_sz, attr_msg->max_xfer_sz); 3751 DMSG(vdc, 0, "[%d] vdisk_block_size: sent %lx acked %x\n", 3752 vdc->instance, vdc->block_size, 3753 attr_msg->vdisk_block_size); 3754 3755 /* 3756 * We don't know at compile time what the vDisk server will 3757 * think are good values but we apply an large (arbitrary) 3758 * upper bound to prevent memory exhaustion in vdc if it was 3759 * allocating a DRing based of huge values sent by the server. 3760 * We probably will never exceed this except if the message 3761 * was garbage. 3762 */ 3763 if ((attr_msg->max_xfer_sz * attr_msg->vdisk_block_size) <= 3764 (PAGESIZE * DEV_BSIZE)) { 3765 vdc->max_xfer_sz = attr_msg->max_xfer_sz; 3766 vdc->block_size = attr_msg->vdisk_block_size; 3767 } else { 3768 DMSG(vdc, 0, "[%d] vds block transfer size too big;" 3769 " using max supported by vdc", vdc->instance); 3770 } 3771 3772 if ((attr_msg->xfer_mode != VIO_DRING_MODE) || 3773 (attr_msg->vdisk_size > INT64_MAX) || 3774 (attr_msg->vdisk_type > VD_DISK_TYPE_DISK)) { 3775 DMSG(vdc, 0, "[%d] Invalid attributes from vds", 3776 vdc->instance); 3777 status = EINVAL; 3778 break; 3779 } 3780 3781 break; 3782 3783 case VIO_SUBTYPE_NACK: 3784 /* 3785 * vds could not handle the attributes we sent so we 3786 * stop negotiating. 3787 */ 3788 status = EPROTO; 3789 break; 3790 3791 case VIO_SUBTYPE_INFO: 3792 /* 3793 * Handle the case where vds starts the handshake 3794 * (for now; vdc is the only supported instigatior) 3795 */ 3796 status = ENOTSUP; 3797 break; 3798 3799 default: 3800 status = ENOTSUP; 3801 break; 3802 } 3803 3804 return (status); 3805 } 3806 3807 /* 3808 * Function: 3809 * vdc_handle_dring_reg_msg() 3810 * 3811 * Description: 3812 * 3813 * Arguments: 3814 * vdc - soft state pointer for this instance of the driver. 3815 * dring_msg - LDC message sent by vDisk server 3816 * 3817 * Return Code: 3818 * 0 - Success 3819 */ 3820 static int 3821 vdc_handle_dring_reg_msg(vdc_t *vdc, vio_dring_reg_msg_t *dring_msg) 3822 { 3823 int status = 0; 3824 3825 ASSERT(vdc != NULL); 3826 ASSERT(mutex_owned(&vdc->lock)); 3827 3828 if (dring_msg->tag.vio_subtype_env != VIO_DRING_REG) { 3829 return (EPROTO); 3830 } 3831 3832 switch (dring_msg->tag.vio_subtype) { 3833 case VIO_SUBTYPE_ACK: 3834 /* save the received dring_ident */ 3835 vdc->dring_ident = dring_msg->dring_ident; 3836 DMSG(vdc, 0, "[%d] Received dring ident=0x%lx\n", 3837 vdc->instance, vdc->dring_ident); 3838 break; 3839 3840 case VIO_SUBTYPE_NACK: 3841 /* 3842 * vds could not handle the DRing info we sent so we 3843 * stop negotiating. 3844 */ 3845 DMSG(vdc, 0, "[%d] server could not register DRing\n", 3846 vdc->instance); 3847 status = EPROTO; 3848 break; 3849 3850 case VIO_SUBTYPE_INFO: 3851 /* 3852 * Handle the case where vds starts handshake 3853 * (for now only vdc is the instigatior) 3854 */ 3855 status = ENOTSUP; 3856 break; 3857 default: 3858 status = ENOTSUP; 3859 } 3860 3861 return (status); 3862 } 3863 3864 /* 3865 * Function: 3866 * vdc_verify_seq_num() 3867 * 3868 * Description: 3869 * This functions verifies that the sequence number sent back by the vDisk 3870 * server with the latest message is what is expected (i.e. it is greater 3871 * than the last seq num sent by the vDisk server and less than or equal 3872 * to the last seq num generated by vdc). 3873 * 3874 * It then checks the request ID to see if any requests need processing 3875 * in the DRing. 3876 * 3877 * Arguments: 3878 * vdc - soft state pointer for this instance of the driver. 3879 * dring_msg - pointer to the LDC message sent by vds 3880 * 3881 * Return Code: 3882 * VDC_SEQ_NUM_TODO - Message needs to be processed 3883 * VDC_SEQ_NUM_SKIP - Message has already been processed 3884 * VDC_SEQ_NUM_INVALID - The seq numbers are so out of sync, 3885 * vdc cannot deal with them 3886 */ 3887 static int 3888 vdc_verify_seq_num(vdc_t *vdc, vio_dring_msg_t *dring_msg) 3889 { 3890 ASSERT(vdc != NULL); 3891 ASSERT(dring_msg != NULL); 3892 ASSERT(mutex_owned(&vdc->lock)); 3893 3894 /* 3895 * Check to see if the messages were responded to in the correct 3896 * order by vds. 3897 */ 3898 if ((dring_msg->seq_num <= vdc->seq_num_reply) || 3899 (dring_msg->seq_num > vdc->seq_num)) { 3900 DMSG(vdc, 0, "?[%d] Bogus sequence_number %lu: " 3901 "%lu > expected <= %lu (last proc req %lu sent %lu)\n", 3902 vdc->instance, dring_msg->seq_num, 3903 vdc->seq_num_reply, vdc->seq_num, 3904 vdc->req_id_proc, vdc->req_id); 3905 return (VDC_SEQ_NUM_INVALID); 3906 } 3907 vdc->seq_num_reply = dring_msg->seq_num; 3908 3909 if (vdc->req_id_proc < vdc->req_id) 3910 return (VDC_SEQ_NUM_TODO); 3911 else 3912 return (VDC_SEQ_NUM_SKIP); 3913 } 3914 3915 3916 /* 3917 * Function: 3918 * vdc_is_supported_version() 3919 * 3920 * Description: 3921 * This routine checks if the major/minor version numbers specified in 3922 * 'ver_msg' are supported. If not it finds the next version that is 3923 * in the supported version list 'vdc_version[]' and sets the fields in 3924 * 'ver_msg' to those values 3925 * 3926 * Arguments: 3927 * ver_msg - LDC message sent by vDisk server 3928 * 3929 * Return Code: 3930 * B_TRUE - Success 3931 * B_FALSE - Version not supported 3932 */ 3933 static boolean_t 3934 vdc_is_supported_version(vio_ver_msg_t *ver_msg) 3935 { 3936 int vdc_num_versions = sizeof (vdc_version) / sizeof (vdc_version[0]); 3937 3938 for (int i = 0; i < vdc_num_versions; i++) { 3939 ASSERT(vdc_version[i].major > 0); 3940 ASSERT((i == 0) || 3941 (vdc_version[i].major < vdc_version[i-1].major)); 3942 3943 /* 3944 * If the major versions match, adjust the minor version, if 3945 * necessary, down to the highest value supported by this 3946 * client. The server should support all minor versions lower 3947 * than the value it sent 3948 */ 3949 if (ver_msg->ver_major == vdc_version[i].major) { 3950 if (ver_msg->ver_minor > vdc_version[i].minor) { 3951 DMSGX(0, 3952 "Adjusting minor version from %u to %u", 3953 ver_msg->ver_minor, vdc_version[i].minor); 3954 ver_msg->ver_minor = vdc_version[i].minor; 3955 } 3956 return (B_TRUE); 3957 } 3958 3959 /* 3960 * If the message contains a higher major version number, set 3961 * the message's major/minor versions to the current values 3962 * and return false, so this message will get resent with 3963 * these values, and the server will potentially try again 3964 * with the same or a lower version 3965 */ 3966 if (ver_msg->ver_major > vdc_version[i].major) { 3967 ver_msg->ver_major = vdc_version[i].major; 3968 ver_msg->ver_minor = vdc_version[i].minor; 3969 DMSGX(0, "Suggesting major/minor (0x%x/0x%x)\n", 3970 ver_msg->ver_major, ver_msg->ver_minor); 3971 3972 return (B_FALSE); 3973 } 3974 3975 /* 3976 * Otherwise, the message's major version is less than the 3977 * current major version, so continue the loop to the next 3978 * (lower) supported version 3979 */ 3980 } 3981 3982 /* 3983 * No common version was found; "ground" the version pair in the 3984 * message to terminate negotiation 3985 */ 3986 ver_msg->ver_major = 0; 3987 ver_msg->ver_minor = 0; 3988 3989 return (B_FALSE); 3990 } 3991 /* -------------------------------------------------------------------------- */ 3992 3993 /* 3994 * DKIO(7) support 3995 */ 3996 3997 typedef struct vdc_dk_arg { 3998 struct dk_callback dkc; 3999 int mode; 4000 dev_t dev; 4001 vdc_t *vdc; 4002 } vdc_dk_arg_t; 4003 4004 /* 4005 * Function: 4006 * vdc_dkio_flush_cb() 4007 * 4008 * Description: 4009 * This routine is a callback for DKIOCFLUSHWRITECACHE which can be called 4010 * by kernel code. 4011 * 4012 * Arguments: 4013 * arg - a pointer to a vdc_dk_arg_t structure. 4014 */ 4015 void 4016 vdc_dkio_flush_cb(void *arg) 4017 { 4018 struct vdc_dk_arg *dk_arg = (struct vdc_dk_arg *)arg; 4019 struct dk_callback *dkc = NULL; 4020 vdc_t *vdc = NULL; 4021 int rv; 4022 4023 if (dk_arg == NULL) { 4024 cmn_err(CE_NOTE, "?[Unk] DKIOCFLUSHWRITECACHE arg is NULL\n"); 4025 return; 4026 } 4027 dkc = &dk_arg->dkc; 4028 vdc = dk_arg->vdc; 4029 ASSERT(vdc != NULL); 4030 4031 rv = vdc_do_sync_op(vdc, VD_OP_FLUSH, NULL, 0, 4032 VDCPART(dk_arg->dev), 0, CB_SYNC, 0, VIO_both_dir); 4033 if (rv != 0) { 4034 DMSG(vdc, 0, "[%d] DKIOCFLUSHWRITECACHE failed %d : model %x\n", 4035 vdc->instance, rv, 4036 ddi_model_convert_from(dk_arg->mode & FMODELS)); 4037 } 4038 4039 /* 4040 * Trigger the call back to notify the caller the the ioctl call has 4041 * been completed. 4042 */ 4043 if ((dk_arg->mode & FKIOCTL) && 4044 (dkc != NULL) && 4045 (dkc->dkc_callback != NULL)) { 4046 ASSERT(dkc->dkc_cookie != NULL); 4047 (*dkc->dkc_callback)(dkc->dkc_cookie, rv); 4048 } 4049 4050 /* Indicate that one less DKIO write flush is outstanding */ 4051 mutex_enter(&vdc->lock); 4052 vdc->dkio_flush_pending--; 4053 ASSERT(vdc->dkio_flush_pending >= 0); 4054 mutex_exit(&vdc->lock); 4055 4056 /* free the mem that was allocated when the callback was dispatched */ 4057 kmem_free(arg, sizeof (vdc_dk_arg_t)); 4058 } 4059 4060 /* 4061 * This structure is used in the DKIO(7I) array below. 4062 */ 4063 typedef struct vdc_dk_ioctl { 4064 uint8_t op; /* VD_OP_XXX value */ 4065 int cmd; /* Solaris ioctl operation number */ 4066 size_t nbytes; /* size of structure to be copied */ 4067 4068 /* function to convert between vDisk and Solaris structure formats */ 4069 int (*convert)(vdc_t *vdc, void *vd_buf, void *ioctl_arg, 4070 int mode, int dir); 4071 } vdc_dk_ioctl_t; 4072 4073 /* 4074 * Subset of DKIO(7I) operations currently supported 4075 */ 4076 static vdc_dk_ioctl_t dk_ioctl[] = { 4077 {VD_OP_FLUSH, DKIOCFLUSHWRITECACHE, sizeof (int), 4078 vdc_null_copy_func}, 4079 {VD_OP_GET_WCE, DKIOCGETWCE, sizeof (int), 4080 vdc_get_wce_convert}, 4081 {VD_OP_SET_WCE, DKIOCSETWCE, sizeof (int), 4082 vdc_set_wce_convert}, 4083 {VD_OP_GET_VTOC, DKIOCGVTOC, sizeof (vd_vtoc_t), 4084 vdc_get_vtoc_convert}, 4085 {VD_OP_SET_VTOC, DKIOCSVTOC, sizeof (vd_vtoc_t), 4086 vdc_set_vtoc_convert}, 4087 {VD_OP_GET_DISKGEOM, DKIOCGGEOM, sizeof (vd_geom_t), 4088 vdc_get_geom_convert}, 4089 {VD_OP_GET_DISKGEOM, DKIOCG_PHYGEOM, sizeof (vd_geom_t), 4090 vdc_get_geom_convert}, 4091 {VD_OP_GET_DISKGEOM, DKIOCG_VIRTGEOM, sizeof (vd_geom_t), 4092 vdc_get_geom_convert}, 4093 {VD_OP_SET_DISKGEOM, DKIOCSGEOM, sizeof (vd_geom_t), 4094 vdc_set_geom_convert}, 4095 {VD_OP_GET_EFI, DKIOCGETEFI, 0, 4096 vdc_get_efi_convert}, 4097 {VD_OP_SET_EFI, DKIOCSETEFI, 0, 4098 vdc_set_efi_convert}, 4099 4100 /* 4101 * These particular ioctls are not sent to the server - vdc fakes up 4102 * the necessary info. 4103 */ 4104 {0, DKIOCINFO, sizeof (struct dk_cinfo), vdc_null_copy_func}, 4105 {0, DKIOCGMEDIAINFO, sizeof (struct dk_minfo), vdc_null_copy_func}, 4106 {0, USCSICMD, sizeof (struct uscsi_cmd), vdc_null_copy_func}, 4107 {0, DKIOCREMOVABLE, 0, vdc_null_copy_func}, 4108 {0, CDROMREADOFFSET, 0, vdc_null_copy_func} 4109 }; 4110 4111 /* 4112 * Function: 4113 * vd_process_ioctl() 4114 * 4115 * Description: 4116 * This routine processes disk specific ioctl calls 4117 * 4118 * Arguments: 4119 * dev - the device number 4120 * cmd - the operation [dkio(7I)] to be processed 4121 * arg - pointer to user provided structure 4122 * (contains data to be set or reference parameter for get) 4123 * mode - bit flag, indicating open settings, 32/64 bit type, etc 4124 * 4125 * Return Code: 4126 * 0 4127 * EFAULT 4128 * ENXIO 4129 * EIO 4130 * ENOTSUP 4131 */ 4132 static int 4133 vd_process_ioctl(dev_t dev, int cmd, caddr_t arg, int mode) 4134 { 4135 int instance = VDCUNIT(dev); 4136 vdc_t *vdc = NULL; 4137 int rv = -1; 4138 int idx = 0; /* index into dk_ioctl[] */ 4139 size_t len = 0; /* #bytes to send to vds */ 4140 size_t alloc_len = 0; /* #bytes to allocate mem for */ 4141 caddr_t mem_p = NULL; 4142 size_t nioctls = (sizeof (dk_ioctl)) / (sizeof (dk_ioctl[0])); 4143 struct vtoc vtoc_saved; 4144 vdc_dk_ioctl_t *iop; 4145 4146 vdc = ddi_get_soft_state(vdc_state, instance); 4147 if (vdc == NULL) { 4148 cmn_err(CE_NOTE, "![%d] Could not get soft state structure", 4149 instance); 4150 return (ENXIO); 4151 } 4152 4153 DMSG(vdc, 0, "[%d] Processing ioctl(%x) for dev %lx : model %x\n", 4154 instance, cmd, dev, ddi_model_convert_from(mode & FMODELS)); 4155 4156 /* 4157 * Validate the ioctl operation to be performed. 4158 * 4159 * If we have looped through the array without finding a match then we 4160 * don't support this ioctl. 4161 */ 4162 for (idx = 0; idx < nioctls; idx++) { 4163 if (cmd == dk_ioctl[idx].cmd) 4164 break; 4165 } 4166 4167 if (idx >= nioctls) { 4168 DMSG(vdc, 0, "[%d] Unsupported ioctl (0x%x)\n", 4169 vdc->instance, cmd); 4170 return (ENOTSUP); 4171 } 4172 4173 iop = &(dk_ioctl[idx]); 4174 4175 if (cmd == DKIOCGETEFI || cmd == DKIOCSETEFI) { 4176 /* size is not fixed for EFI ioctls, it depends on ioctl arg */ 4177 dk_efi_t dk_efi; 4178 4179 rv = ddi_copyin(arg, &dk_efi, sizeof (dk_efi_t), mode); 4180 if (rv != 0) 4181 return (EFAULT); 4182 4183 len = sizeof (vd_efi_t) - 1 + dk_efi.dki_length; 4184 } else { 4185 len = iop->nbytes; 4186 } 4187 4188 /* 4189 * Deal with the ioctls which the server does not provide. vdc can 4190 * fake these up and return immediately 4191 */ 4192 switch (cmd) { 4193 case CDROMREADOFFSET: 4194 case DKIOCREMOVABLE: 4195 case USCSICMD: 4196 return (ENOTTY); 4197 4198 case DKIOCINFO: 4199 { 4200 struct dk_cinfo cinfo; 4201 if (vdc->cinfo == NULL) 4202 return (ENXIO); 4203 4204 bcopy(vdc->cinfo, &cinfo, sizeof (struct dk_cinfo)); 4205 cinfo.dki_partition = VDCPART(dev); 4206 4207 rv = ddi_copyout(&cinfo, (void *)arg, 4208 sizeof (struct dk_cinfo), mode); 4209 if (rv != 0) 4210 return (EFAULT); 4211 4212 return (0); 4213 } 4214 4215 case DKIOCGMEDIAINFO: 4216 { 4217 if (vdc->minfo == NULL) 4218 return (ENXIO); 4219 4220 rv = ddi_copyout(vdc->minfo, (void *)arg, 4221 sizeof (struct dk_minfo), mode); 4222 if (rv != 0) 4223 return (EFAULT); 4224 4225 return (0); 4226 } 4227 4228 case DKIOCFLUSHWRITECACHE: 4229 { 4230 struct dk_callback *dkc = (struct dk_callback *)arg; 4231 vdc_dk_arg_t *dkarg = NULL; 4232 4233 DMSG(vdc, 1, "[%d] Flush W$: mode %x\n", 4234 instance, mode); 4235 4236 /* 4237 * If the backing device is not a 'real' disk then the 4238 * W$ operation request to the vDisk server will fail 4239 * so we might as well save the cycles and return now. 4240 */ 4241 if (vdc->vdisk_type != VD_DISK_TYPE_DISK) 4242 return (ENOTTY); 4243 4244 /* 4245 * If arg is NULL, then there is no callback function 4246 * registered and the call operates synchronously; we 4247 * break and continue with the rest of the function and 4248 * wait for vds to return (i.e. after the request to 4249 * vds returns successfully, all writes completed prior 4250 * to the ioctl will have been flushed from the disk 4251 * write cache to persistent media. 4252 * 4253 * If a callback function is registered, we dispatch 4254 * the request on a task queue and return immediately. 4255 * The callback will deal with informing the calling 4256 * thread that the flush request is completed. 4257 */ 4258 if (dkc == NULL) 4259 break; 4260 4261 dkarg = kmem_zalloc(sizeof (vdc_dk_arg_t), KM_SLEEP); 4262 4263 dkarg->mode = mode; 4264 dkarg->dev = dev; 4265 bcopy(dkc, &dkarg->dkc, sizeof (*dkc)); 4266 4267 mutex_enter(&vdc->lock); 4268 vdc->dkio_flush_pending++; 4269 dkarg->vdc = vdc; 4270 mutex_exit(&vdc->lock); 4271 4272 /* put the request on a task queue */ 4273 rv = taskq_dispatch(system_taskq, vdc_dkio_flush_cb, 4274 (void *)dkarg, DDI_SLEEP); 4275 if (rv == NULL) { 4276 /* clean up if dispatch fails */ 4277 mutex_enter(&vdc->lock); 4278 vdc->dkio_flush_pending--; 4279 kmem_free(dkarg, sizeof (vdc_dk_arg_t)); 4280 } 4281 4282 return (rv == NULL ? ENOMEM : 0); 4283 } 4284 } 4285 4286 /* catch programming error in vdc - should be a VD_OP_XXX ioctl */ 4287 ASSERT(iop->op != 0); 4288 4289 /* LDC requires that the memory being mapped is 8-byte aligned */ 4290 alloc_len = P2ROUNDUP(len, sizeof (uint64_t)); 4291 DMSG(vdc, 1, "[%d] struct size %ld alloc %ld\n", 4292 instance, len, alloc_len); 4293 4294 ASSERT(alloc_len != 0); /* sanity check */ 4295 mem_p = kmem_zalloc(alloc_len, KM_SLEEP); 4296 4297 if (cmd == DKIOCSVTOC) { 4298 /* 4299 * Save a copy of the current VTOC so that we can roll back 4300 * if the setting of the new VTOC fails. 4301 */ 4302 bcopy(vdc->vtoc, &vtoc_saved, sizeof (struct vtoc)); 4303 } 4304 4305 /* 4306 * Call the conversion function for this ioctl whhich if necessary 4307 * converts from the Solaris format to the format ARC'ed 4308 * as part of the vDisk protocol (FWARC 2006/195) 4309 */ 4310 ASSERT(iop->convert != NULL); 4311 rv = (iop->convert)(vdc, arg, mem_p, mode, VD_COPYIN); 4312 if (rv != 0) { 4313 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4314 instance, rv, cmd); 4315 if (mem_p != NULL) 4316 kmem_free(mem_p, alloc_len); 4317 return (rv); 4318 } 4319 4320 /* 4321 * send request to vds to service the ioctl. 4322 */ 4323 rv = vdc_do_sync_op(vdc, iop->op, mem_p, alloc_len, 4324 VDCPART(dev), 0, CB_SYNC, (void *)(uint64_t)mode, 4325 VIO_both_dir); 4326 4327 if (rv != 0) { 4328 /* 4329 * This is not necessarily an error. The ioctl could 4330 * be returning a value such as ENOTTY to indicate 4331 * that the ioctl is not applicable. 4332 */ 4333 DMSG(vdc, 0, "[%d] vds returned %d for ioctl 0x%x\n", 4334 instance, rv, cmd); 4335 if (mem_p != NULL) 4336 kmem_free(mem_p, alloc_len); 4337 4338 if (cmd == DKIOCSVTOC) { 4339 /* update of the VTOC has failed, roll back */ 4340 bcopy(&vtoc_saved, vdc->vtoc, sizeof (struct vtoc)); 4341 } 4342 4343 return (rv); 4344 } 4345 4346 if (cmd == DKIOCSVTOC) { 4347 /* 4348 * The VTOC has been changed. We need to update the device 4349 * nodes to handle the case where an EFI label has been 4350 * changed to a VTOC label. We also try and update the device 4351 * node properties. Failing to set the properties should 4352 * not cause an error to be return the caller though. 4353 */ 4354 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4355 (void) vdc_create_device_nodes_vtoc(vdc); 4356 4357 if (vdc_create_device_nodes_props(vdc)) { 4358 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4359 " properties", vdc->instance); 4360 } 4361 4362 } else if (cmd == DKIOCSETEFI) { 4363 /* 4364 * The EFI has been changed. We need to update the device 4365 * nodes to handle the case where a VTOC label has been 4366 * changed to an EFI label. We also try and update the device 4367 * node properties. Failing to set the properties should 4368 * not cause an error to be return the caller though. 4369 */ 4370 struct dk_gpt *efi; 4371 size_t efi_len; 4372 4373 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4374 (void) vdc_create_device_nodes_efi(vdc); 4375 4376 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4377 4378 if (rv == 0) { 4379 vdc_store_efi(vdc, efi); 4380 rv = vdc_create_device_nodes_props(vdc); 4381 vd_efi_free(efi, efi_len); 4382 } 4383 4384 if (rv) { 4385 DMSG(vdc, 0, "![%d] Failed to update device nodes" 4386 " properties", vdc->instance); 4387 } 4388 } 4389 4390 /* 4391 * Call the conversion function (if it exists) for this ioctl 4392 * which converts from the format ARC'ed as part of the vDisk 4393 * protocol (FWARC 2006/195) back to a format understood by 4394 * the rest of Solaris. 4395 */ 4396 rv = (iop->convert)(vdc, mem_p, arg, mode, VD_COPYOUT); 4397 if (rv != 0) { 4398 DMSG(vdc, 0, "[%d] convert func returned %d for ioctl 0x%x\n", 4399 instance, rv, cmd); 4400 if (mem_p != NULL) 4401 kmem_free(mem_p, alloc_len); 4402 return (rv); 4403 } 4404 4405 if (mem_p != NULL) 4406 kmem_free(mem_p, alloc_len); 4407 4408 return (rv); 4409 } 4410 4411 /* 4412 * Function: 4413 * 4414 * Description: 4415 * This is an empty conversion function used by ioctl calls which 4416 * do not need to convert the data being passed in/out to userland 4417 */ 4418 static int 4419 vdc_null_copy_func(vdc_t *vdc, void *from, void *to, int mode, int dir) 4420 { 4421 _NOTE(ARGUNUSED(vdc)) 4422 _NOTE(ARGUNUSED(from)) 4423 _NOTE(ARGUNUSED(to)) 4424 _NOTE(ARGUNUSED(mode)) 4425 _NOTE(ARGUNUSED(dir)) 4426 4427 return (0); 4428 } 4429 4430 static int 4431 vdc_get_wce_convert(vdc_t *vdc, void *from, void *to, 4432 int mode, int dir) 4433 { 4434 _NOTE(ARGUNUSED(vdc)) 4435 4436 if (dir == VD_COPYIN) 4437 return (0); /* nothing to do */ 4438 4439 if (ddi_copyout(from, to, sizeof (int), mode) != 0) 4440 return (EFAULT); 4441 4442 return (0); 4443 } 4444 4445 static int 4446 vdc_set_wce_convert(vdc_t *vdc, void *from, void *to, 4447 int mode, int dir) 4448 { 4449 _NOTE(ARGUNUSED(vdc)) 4450 4451 if (dir == VD_COPYOUT) 4452 return (0); /* nothing to do */ 4453 4454 if (ddi_copyin(from, to, sizeof (int), mode) != 0) 4455 return (EFAULT); 4456 4457 return (0); 4458 } 4459 4460 /* 4461 * Function: 4462 * vdc_get_vtoc_convert() 4463 * 4464 * Description: 4465 * This routine performs the necessary convertions from the DKIOCGVTOC 4466 * Solaris structure to the format defined in FWARC 2006/195. 4467 * 4468 * In the struct vtoc definition, the timestamp field is marked as not 4469 * supported so it is not part of vDisk protocol (FWARC 2006/195). 4470 * However SVM uses that field to check it can write into the VTOC, 4471 * so we fake up the info of that field. 4472 * 4473 * Arguments: 4474 * vdc - the vDisk client 4475 * from - the buffer containing the data to be copied from 4476 * to - the buffer to be copied to 4477 * mode - flags passed to ioctl() call 4478 * dir - the "direction" of the copy - VD_COPYIN or VD_COPYOUT 4479 * 4480 * Return Code: 4481 * 0 - Success 4482 * ENXIO - incorrect buffer passed in. 4483 * EFAULT - ddi_copyout routine encountered an error. 4484 */ 4485 static int 4486 vdc_get_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4487 { 4488 int i; 4489 void *tmp_mem = NULL; 4490 void *tmp_memp; 4491 struct vtoc vt; 4492 struct vtoc32 vt32; 4493 int copy_len = 0; 4494 int rv = 0; 4495 4496 if (dir != VD_COPYOUT) 4497 return (0); /* nothing to do */ 4498 4499 if ((from == NULL) || (to == NULL)) 4500 return (ENXIO); 4501 4502 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4503 copy_len = sizeof (struct vtoc32); 4504 else 4505 copy_len = sizeof (struct vtoc); 4506 4507 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4508 4509 VD_VTOC2VTOC((vd_vtoc_t *)from, &vt); 4510 4511 /* fake the VTOC timestamp field */ 4512 for (i = 0; i < V_NUMPAR; i++) { 4513 vt.timestamp[i] = vdc->vtoc->timestamp[i]; 4514 } 4515 4516 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4517 vtoctovtoc32(vt, vt32); 4518 tmp_memp = &vt32; 4519 } else { 4520 tmp_memp = &vt; 4521 } 4522 rv = ddi_copyout(tmp_memp, to, copy_len, mode); 4523 if (rv != 0) 4524 rv = EFAULT; 4525 4526 kmem_free(tmp_mem, copy_len); 4527 return (rv); 4528 } 4529 4530 /* 4531 * Function: 4532 * vdc_set_vtoc_convert() 4533 * 4534 * Description: 4535 * This routine performs the necessary convertions from the DKIOCSVTOC 4536 * Solaris structure to the format defined in FWARC 2006/195. 4537 * 4538 * Arguments: 4539 * vdc - the vDisk client 4540 * from - Buffer with data 4541 * to - Buffer where data is to be copied to 4542 * mode - flags passed to ioctl 4543 * dir - direction of copy (in or out) 4544 * 4545 * Return Code: 4546 * 0 - Success 4547 * ENXIO - Invalid buffer passed in 4548 * EFAULT - ddi_copyin of data failed 4549 */ 4550 static int 4551 vdc_set_vtoc_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4552 { 4553 void *tmp_mem = NULL; 4554 struct vtoc vt; 4555 struct vtoc *vtp = &vt; 4556 vd_vtoc_t vtvd; 4557 int copy_len = 0; 4558 int rv = 0; 4559 4560 if (dir != VD_COPYIN) 4561 return (0); /* nothing to do */ 4562 4563 if ((from == NULL) || (to == NULL)) 4564 return (ENXIO); 4565 4566 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) 4567 copy_len = sizeof (struct vtoc32); 4568 else 4569 copy_len = sizeof (struct vtoc); 4570 4571 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4572 4573 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4574 if (rv != 0) { 4575 kmem_free(tmp_mem, copy_len); 4576 return (EFAULT); 4577 } 4578 4579 if (ddi_model_convert_from(mode & FMODELS) == DDI_MODEL_ILP32) { 4580 vtoc32tovtoc((*(struct vtoc32 *)tmp_mem), vt); 4581 } else { 4582 vtp = tmp_mem; 4583 } 4584 4585 /* 4586 * The VTOC is being changed, then vdc needs to update the copy 4587 * it saved in the soft state structure. 4588 */ 4589 bcopy(vtp, vdc->vtoc, sizeof (struct vtoc)); 4590 4591 VTOC2VD_VTOC(vtp, &vtvd); 4592 bcopy(&vtvd, to, sizeof (vd_vtoc_t)); 4593 kmem_free(tmp_mem, copy_len); 4594 4595 return (0); 4596 } 4597 4598 /* 4599 * Function: 4600 * vdc_get_geom_convert() 4601 * 4602 * Description: 4603 * This routine performs the necessary convertions from the DKIOCGGEOM, 4604 * DKIOCG_PHYSGEOM and DKIOG_VIRTGEOM Solaris structures to the format 4605 * defined in FWARC 2006/195 4606 * 4607 * Arguments: 4608 * vdc - the vDisk client 4609 * from - Buffer with data 4610 * to - Buffer where data is to be copied to 4611 * mode - flags passed to ioctl 4612 * dir - direction of copy (in or out) 4613 * 4614 * Return Code: 4615 * 0 - Success 4616 * ENXIO - Invalid buffer passed in 4617 * EFAULT - ddi_copyout of data failed 4618 */ 4619 static int 4620 vdc_get_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4621 { 4622 _NOTE(ARGUNUSED(vdc)) 4623 4624 struct dk_geom geom; 4625 int copy_len = sizeof (struct dk_geom); 4626 int rv = 0; 4627 4628 if (dir != VD_COPYOUT) 4629 return (0); /* nothing to do */ 4630 4631 if ((from == NULL) || (to == NULL)) 4632 return (ENXIO); 4633 4634 VD_GEOM2DK_GEOM((vd_geom_t *)from, &geom); 4635 rv = ddi_copyout(&geom, to, copy_len, mode); 4636 if (rv != 0) 4637 rv = EFAULT; 4638 4639 return (rv); 4640 } 4641 4642 /* 4643 * Function: 4644 * vdc_set_geom_convert() 4645 * 4646 * Description: 4647 * This routine performs the necessary convertions from the DKIOCSGEOM 4648 * Solaris structure to the format defined in FWARC 2006/195. 4649 * 4650 * Arguments: 4651 * vdc - the vDisk client 4652 * from - Buffer with data 4653 * to - Buffer where data is to be copied to 4654 * mode - flags passed to ioctl 4655 * dir - direction of copy (in or out) 4656 * 4657 * Return Code: 4658 * 0 - Success 4659 * ENXIO - Invalid buffer passed in 4660 * EFAULT - ddi_copyin of data failed 4661 */ 4662 static int 4663 vdc_set_geom_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4664 { 4665 _NOTE(ARGUNUSED(vdc)) 4666 4667 vd_geom_t vdgeom; 4668 void *tmp_mem = NULL; 4669 int copy_len = sizeof (struct dk_geom); 4670 int rv = 0; 4671 4672 if (dir != VD_COPYIN) 4673 return (0); /* nothing to do */ 4674 4675 if ((from == NULL) || (to == NULL)) 4676 return (ENXIO); 4677 4678 tmp_mem = kmem_alloc(copy_len, KM_SLEEP); 4679 4680 rv = ddi_copyin(from, tmp_mem, copy_len, mode); 4681 if (rv != 0) { 4682 kmem_free(tmp_mem, copy_len); 4683 return (EFAULT); 4684 } 4685 DK_GEOM2VD_GEOM((struct dk_geom *)tmp_mem, &vdgeom); 4686 bcopy(&vdgeom, to, sizeof (vdgeom)); 4687 kmem_free(tmp_mem, copy_len); 4688 4689 return (0); 4690 } 4691 4692 static int 4693 vdc_get_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4694 { 4695 _NOTE(ARGUNUSED(vdc)) 4696 4697 vd_efi_t *vd_efi; 4698 dk_efi_t dk_efi; 4699 int rv = 0; 4700 void *uaddr; 4701 4702 if ((from == NULL) || (to == NULL)) 4703 return (ENXIO); 4704 4705 if (dir == VD_COPYIN) { 4706 4707 vd_efi = (vd_efi_t *)to; 4708 4709 rv = ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode); 4710 if (rv != 0) 4711 return (EFAULT); 4712 4713 vd_efi->lba = dk_efi.dki_lba; 4714 vd_efi->length = dk_efi.dki_length; 4715 bzero(vd_efi->data, vd_efi->length); 4716 4717 } else { 4718 4719 rv = ddi_copyin(to, &dk_efi, sizeof (dk_efi_t), mode); 4720 if (rv != 0) 4721 return (EFAULT); 4722 4723 uaddr = dk_efi.dki_data; 4724 4725 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4726 4727 VD_EFI2DK_EFI((vd_efi_t *)from, &dk_efi); 4728 4729 rv = ddi_copyout(dk_efi.dki_data, uaddr, dk_efi.dki_length, 4730 mode); 4731 if (rv != 0) 4732 return (EFAULT); 4733 4734 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4735 } 4736 4737 return (0); 4738 } 4739 4740 static int 4741 vdc_set_efi_convert(vdc_t *vdc, void *from, void *to, int mode, int dir) 4742 { 4743 _NOTE(ARGUNUSED(vdc)) 4744 4745 dk_efi_t dk_efi; 4746 void *uaddr; 4747 4748 if (dir == VD_COPYOUT) 4749 return (0); /* nothing to do */ 4750 4751 if ((from == NULL) || (to == NULL)) 4752 return (ENXIO); 4753 4754 if (ddi_copyin(from, &dk_efi, sizeof (dk_efi_t), mode) != 0) 4755 return (EFAULT); 4756 4757 uaddr = dk_efi.dki_data; 4758 4759 dk_efi.dki_data = kmem_alloc(dk_efi.dki_length, KM_SLEEP); 4760 4761 if (ddi_copyin(uaddr, dk_efi.dki_data, dk_efi.dki_length, mode) != 0) 4762 return (EFAULT); 4763 4764 DK_EFI2VD_EFI(&dk_efi, (vd_efi_t *)to); 4765 4766 kmem_free(dk_efi.dki_data, dk_efi.dki_length); 4767 4768 return (0); 4769 } 4770 4771 /* 4772 * Function: 4773 * vdc_create_fake_geometry() 4774 * 4775 * Description: 4776 * This routine fakes up the disk info needed for some DKIO ioctls. 4777 * - DKIOCINFO 4778 * - DKIOCGMEDIAINFO 4779 * 4780 * [ just like lofi(7D) and ramdisk(7D) ] 4781 * 4782 * Arguments: 4783 * vdc - soft state pointer for this instance of the device driver. 4784 * 4785 * Return Code: 4786 * 0 - Success 4787 */ 4788 static int 4789 vdc_create_fake_geometry(vdc_t *vdc) 4790 { 4791 ASSERT(vdc != NULL); 4792 4793 /* 4794 * Check if max_xfer_sz and vdisk_size are valid 4795 */ 4796 if (vdc->vdisk_size == 0 || vdc->max_xfer_sz == 0) 4797 return (EIO); 4798 4799 /* 4800 * DKIOCINFO support 4801 */ 4802 vdc->cinfo = kmem_zalloc(sizeof (struct dk_cinfo), KM_SLEEP); 4803 4804 (void) strcpy(vdc->cinfo->dki_cname, VDC_DRIVER_NAME); 4805 (void) strcpy(vdc->cinfo->dki_dname, VDC_DRIVER_NAME); 4806 /* max_xfer_sz is #blocks so we don't need to divide by DEV_BSIZE */ 4807 vdc->cinfo->dki_maxtransfer = vdc->max_xfer_sz; 4808 vdc->cinfo->dki_ctype = DKC_SCSI_CCS; 4809 vdc->cinfo->dki_flags = DKI_FMTVOL; 4810 vdc->cinfo->dki_cnum = 0; 4811 vdc->cinfo->dki_addr = 0; 4812 vdc->cinfo->dki_space = 0; 4813 vdc->cinfo->dki_prio = 0; 4814 vdc->cinfo->dki_vec = 0; 4815 vdc->cinfo->dki_unit = vdc->instance; 4816 vdc->cinfo->dki_slave = 0; 4817 /* 4818 * The partition number will be created on the fly depending on the 4819 * actual slice (i.e. minor node) that is used to request the data. 4820 */ 4821 vdc->cinfo->dki_partition = 0; 4822 4823 /* 4824 * DKIOCGMEDIAINFO support 4825 */ 4826 if (vdc->minfo == NULL) 4827 vdc->minfo = kmem_zalloc(sizeof (struct dk_minfo), KM_SLEEP); 4828 vdc->minfo->dki_media_type = DK_FIXED_DISK; 4829 vdc->minfo->dki_capacity = vdc->vdisk_size; 4830 vdc->minfo->dki_lbsize = DEV_BSIZE; 4831 4832 return (0); 4833 } 4834 4835 /* 4836 * Function: 4837 * vdc_setup_disk_layout() 4838 * 4839 * Description: 4840 * This routine discovers all the necessary details about the "disk" 4841 * by requesting the data that is available from the vDisk server and by 4842 * faking up the rest of the data. 4843 * 4844 * Arguments: 4845 * vdc - soft state pointer for this instance of the device driver. 4846 * 4847 * Return Code: 4848 * 0 - Success 4849 */ 4850 static int 4851 vdc_setup_disk_layout(vdc_t *vdc) 4852 { 4853 buf_t *buf; /* BREAD requests need to be in a buf_t structure */ 4854 dev_t dev; 4855 int slice = 0; 4856 int rv, error; 4857 4858 ASSERT(vdc != NULL); 4859 4860 if (vdc->vtoc == NULL) 4861 vdc->vtoc = kmem_zalloc(sizeof (struct vtoc), KM_SLEEP); 4862 4863 dev = makedevice(ddi_driver_major(vdc->dip), 4864 VD_MAKE_DEV(vdc->instance, 0)); 4865 rv = vd_process_ioctl(dev, DKIOCGVTOC, (caddr_t)vdc->vtoc, FKIOCTL); 4866 4867 if (rv && rv != ENOTSUP) { 4868 DMSG(vdc, 0, "[%d] Failed to get VTOC (err=%d)", 4869 vdc->instance, rv); 4870 return (rv); 4871 } 4872 4873 /* 4874 * The process of attempting to read VTOC will initiate 4875 * the handshake and establish a connection. Following 4876 * handshake, go ahead and create geometry. 4877 */ 4878 error = vdc_create_fake_geometry(vdc); 4879 if (error != 0) { 4880 DMSG(vdc, 0, "[%d] Failed to create disk geometry (err%d)", 4881 vdc->instance, error); 4882 return (error); 4883 } 4884 4885 if (rv == ENOTSUP) { 4886 /* 4887 * If the device does not support VTOC then we try 4888 * to read an EFI label. 4889 */ 4890 struct dk_gpt *efi; 4891 size_t efi_len; 4892 4893 rv = vdc_efi_alloc_and_read(dev, &efi, &efi_len); 4894 4895 if (rv) { 4896 DMSG(vdc, 0, "[%d] Failed to get EFI (err=%d)", 4897 vdc->instance, rv); 4898 return (rv); 4899 } 4900 4901 vdc->vdisk_label = VD_DISK_LABEL_EFI; 4902 vdc_store_efi(vdc, efi); 4903 vd_efi_free(efi, efi_len); 4904 4905 return (0); 4906 } 4907 4908 vdc->vdisk_label = VD_DISK_LABEL_VTOC; 4909 4910 /* 4911 * FUTURE: This could be default way for reading the VTOC 4912 * from the disk as supposed to sending the VD_OP_GET_VTOC 4913 * to the server. Currently this is a sanity check. 4914 * 4915 * find the slice that represents the entire "disk" and use that to 4916 * read the disk label. The convention in Solaris is that slice 2 4917 * represents the whole disk so we check that it is, otherwise we 4918 * default to slice 0 4919 */ 4920 if ((vdc->vdisk_type == VD_DISK_TYPE_DISK) && 4921 (vdc->vtoc->v_part[2].p_tag == V_BACKUP)) { 4922 slice = 2; 4923 } else { 4924 slice = 0; 4925 } 4926 4927 /* 4928 * Read disk label from start of disk 4929 */ 4930 vdc->label = kmem_zalloc(DK_LABEL_SIZE, KM_SLEEP); 4931 buf = kmem_alloc(sizeof (buf_t), KM_SLEEP); 4932 bioinit(buf); 4933 buf->b_un.b_addr = (caddr_t)vdc->label; 4934 buf->b_bcount = DK_LABEL_SIZE; 4935 buf->b_flags = B_BUSY | B_READ; 4936 buf->b_dev = dev; 4937 rv = vdc_send_request(vdc, VD_OP_BREAD, (caddr_t)vdc->label, 4938 DK_LABEL_SIZE, slice, 0, CB_STRATEGY, buf, VIO_read_dir); 4939 if (rv) { 4940 DMSG(vdc, 1, "[%d] Failed to read disk block 0\n", 4941 vdc->instance); 4942 kmem_free(buf, sizeof (buf_t)); 4943 return (rv); 4944 } 4945 rv = biowait(buf); 4946 biofini(buf); 4947 kmem_free(buf, sizeof (buf_t)); 4948 4949 return (rv); 4950 } 4951 4952 /* 4953 * Function: 4954 * vdc_setup_devid() 4955 * 4956 * Description: 4957 * This routine discovers the devid of a vDisk. It requests the devid of 4958 * the underlying device from the vDisk server, builds an encapsulated 4959 * devid based on the retrieved devid and registers that new devid to 4960 * the vDisk. 4961 * 4962 * Arguments: 4963 * vdc - soft state pointer for this instance of the device driver. 4964 * 4965 * Return Code: 4966 * 0 - A devid was succesfully registered for the vDisk 4967 */ 4968 static int 4969 vdc_setup_devid(vdc_t *vdc) 4970 { 4971 int rv; 4972 vd_devid_t *vd_devid; 4973 size_t bufsize, bufid_len; 4974 4975 /* 4976 * At first sight, we don't know the size of the devid that the 4977 * server will return but this size will be encoded into the 4978 * reply. So we do a first request using a default size then we 4979 * check if this size was large enough. If not then we do a second 4980 * request with the correct size returned by the server. Note that 4981 * ldc requires size to be 8-byte aligned. 4982 */ 4983 bufsize = P2ROUNDUP(VD_DEVID_SIZE(VD_DEVID_DEFAULT_LEN), 4984 sizeof (uint64_t)); 4985 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 4986 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 4987 4988 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, (caddr_t)vd_devid, 4989 bufsize, 0, 0, CB_SYNC, 0, VIO_both_dir); 4990 4991 DMSG(vdc, 2, "sync_op returned %d\n", rv); 4992 4993 if (rv) { 4994 kmem_free(vd_devid, bufsize); 4995 return (rv); 4996 } 4997 4998 if (vd_devid->length > bufid_len) { 4999 /* 5000 * The returned devid is larger than the buffer used. Try again 5001 * with a buffer with the right size. 5002 */ 5003 kmem_free(vd_devid, bufsize); 5004 bufsize = P2ROUNDUP(VD_DEVID_SIZE(vd_devid->length), 5005 sizeof (uint64_t)); 5006 vd_devid = kmem_zalloc(bufsize, KM_SLEEP); 5007 bufid_len = bufsize - sizeof (vd_efi_t) - 1; 5008 5009 rv = vdc_do_sync_op(vdc, VD_OP_GET_DEVID, 5010 (caddr_t)vd_devid, bufsize, 0, 0, CB_SYNC, 0, 5011 VIO_both_dir); 5012 5013 if (rv) { 5014 kmem_free(vd_devid, bufsize); 5015 return (rv); 5016 } 5017 } 5018 5019 /* 5020 * The virtual disk should have the same device id as the one associated 5021 * with the physical disk it is mapped on, otherwise sharing a disk 5022 * between a LDom and a non-LDom may not work (for example for a shared 5023 * SVM disk set). 5024 * 5025 * The DDI framework does not allow creating a device id with any 5026 * type so we first create a device id of type DEVID_ENCAP and then 5027 * we restore the orignal type of the physical device. 5028 */ 5029 5030 DMSG(vdc, 2, ": devid length = %d\n", vd_devid->length); 5031 5032 /* build an encapsulated devid based on the returned devid */ 5033 if (ddi_devid_init(vdc->dip, DEVID_ENCAP, vd_devid->length, 5034 vd_devid->id, &vdc->devid) != DDI_SUCCESS) { 5035 DMSG(vdc, 1, "[%d] Fail to created devid\n", vdc->instance); 5036 kmem_free(vd_devid, bufsize); 5037 return (1); 5038 } 5039 5040 DEVID_FORMTYPE((impl_devid_t *)vdc->devid, vd_devid->type); 5041 5042 ASSERT(ddi_devid_valid(vdc->devid) == DDI_SUCCESS); 5043 5044 kmem_free(vd_devid, bufsize); 5045 5046 if (ddi_devid_register(vdc->dip, vdc->devid) != DDI_SUCCESS) { 5047 DMSG(vdc, 1, "[%d] Fail to register devid\n", vdc->instance); 5048 return (1); 5049 } 5050 5051 return (0); 5052 } 5053 5054 static void 5055 vdc_store_efi(vdc_t *vdc, struct dk_gpt *efi) 5056 { 5057 struct vtoc *vtoc = vdc->vtoc; 5058 5059 vd_efi_to_vtoc(efi, vtoc); 5060 if (vdc->vdisk_type == VD_DISK_TYPE_SLICE) { 5061 /* 5062 * vd_efi_to_vtoc() will store information about the EFI Sun 5063 * reserved partition (representing the entire disk) into 5064 * partition 7. However single-slice device will only have 5065 * that single partition and the vdc driver expects to find 5066 * information about that partition in slice 0. So we need 5067 * to copy information from slice 7 to slice 0. 5068 */ 5069 vtoc->v_part[0].p_tag = vtoc->v_part[VD_EFI_WD_SLICE].p_tag; 5070 vtoc->v_part[0].p_flag = vtoc->v_part[VD_EFI_WD_SLICE].p_flag; 5071 vtoc->v_part[0].p_start = vtoc->v_part[VD_EFI_WD_SLICE].p_start; 5072 vtoc->v_part[0].p_size = vtoc->v_part[VD_EFI_WD_SLICE].p_size; 5073 } 5074 } 5075